[cherry-pick][xpu] update xdnn activations (#46282)

* [XPU] update xdnn activations. (#46246) * [XPU] update xpu cmake. test=kunlun

[cherry-pick][xpu] update xdnn activations (#46282)
* [XPU] update xdnn activations. (#46246) * [XPU] update xpu cmake. test=kunlun
a43f960e · houj04 · GitHub · adb2f5e6 · a43f960e · a43f960e
隐藏空白更改
内联并排

Showing with 36 addition and 11 deletion

cmake/external/xpu.cmake cmake/external/xpu.cmake +3 -3

paddle/phi/kernels/xpu/activation_kernel.cc paddle/phi/kernels/xpu/activation_kernel.cc +33 -8

未找到文件。
--- a/cmake/external/xpu.cmake
+++ b/cmake/external/xpu.cmake
@@ -9,8 +9,8 @@ set(XPU_RT_LIB_NAME "libxpurt.so")

 if(NOT DEFINED XPU_BASE_URL)
  set(XPU_BASE_URL_WITHOUT_DATE
-      "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
-  set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220907")
+      "https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev")
+  set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220919")
 else()
  set(XPU_BASE_URL "${XPU_BASE_URL}")
 endif()
@@ -19,7 +19,7 @@ endif()
 if(NOT DEFINED XPU_XDNN_BASE_URL)
  set(XPU_XDNN_BASE_URL_WITHOUT_DATE
      "https://klx-sdk-release-public.su.bcebos.com/xdnn/dev")
-  set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220907")
+  set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220919")
 else()
  set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}")
 endif()

--- a/paddle/phi/kernels/xpu/activation_kernel.cc
+++ b/paddle/phi/kernels/xpu/activation_kernel.cc
@@ -82,18 +82,43 @@ int xpu_activation_func(
 }

 template <typename Context, typename T, typename XPUType>
-int xpu_activation_1attr_func(
+int xpu_activation_func_with_max_x_y(
    const Context& dev_ctx,
    const DenseTensor& x,
    DenseTensor* out,
-    float attr,
-    std::function<int(xpu::Context*, const XPUType*, XPUType*, int, float)>
+    std::function<
+        int(xpu::Context*, const XPUType*, XPUType*, int, const float*, float*)>
        func) {
+  // does not support "const float* max_x, float* max_y" now
  int r = func(dev_ctx.x_context(),
               reinterpret_cast<const XPUType*>(x.data<T>()),
               reinterpret_cast<XPUType*>(out->data<T>()),
               x.numel(),
-               attr);
+               nullptr,
+               nullptr);
+  return r;
+}
+
+template <typename Context, typename T, typename XPUType>
+int xpu_activation_1attr_func(const Context& dev_ctx,
+                              const DenseTensor& x,
+                              DenseTensor* out,
+                              float attr,
+                              std::function<int(xpu::Context*,
+                                                const XPUType*,
+                                                XPUType*,
+                                                int,
+                                                float,
+                                                const float*,
+                                                float*)> func) {
+  // does not support "const float* max_x, float* max_y" now
+  int r = func(dev_ctx.x_context(),
+               reinterpret_cast<const XPUType*>(x.data<T>()),
+               reinterpret_cast<XPUType*>(out->data<T>()),
+               x.numel(),
+               attr,
+               nullptr,
+               nullptr);
  return r;
 }

@@ -213,7 +238,7 @@ struct XPUHardSwishFunctor : public funcs::BaseActivationFunctor<T> {
        offset,
        3.0f,
        errors::External("Not support offset [%f] in XPU", offset));
-    int r = xpu_activation_func<Context, T, XPUType>(
+    int r = xpu_activation_func_with_max_x_y<Context, T, XPUType>(
        dev_ctx, x, out, xpu::hard_swish<XPUType>);
    PADDLE_ENFORCE_XDNN_SUCCESS(r, "hard_swish");
  }
@@ -259,7 +284,7 @@ struct XPURelu6Functor : public funcs::BaseActivationFunctor<T> {
  void operator()(const Context& dev_ctx,
                  const DenseTensor& x,
                  DenseTensor* out) const {
-    int r = xpu_activation_func<Context, T, XPUType>(
+    int r = xpu_activation_func_with_max_x_y<Context, T, XPUType>(
        dev_ctx, x, out, xpu::relu6<XPUType>);
    PADDLE_ENFORCE_XDNN_SUCCESS(r, "relu6");
  }
@@ -272,7 +297,7 @@ struct XPUSigmoidFunctor : public funcs::BaseActivationFunctor<T> {
  void operator()(const Context& dev_ctx,
                  const DenseTensor& x,
                  DenseTensor* out) const {
-    int r = xpu_activation_func<Context, T, XPUType>(
+    int r = xpu_activation_func_with_max_x_y<Context, T, XPUType>(
        dev_ctx, x, out, xpu::sigmoid<XPUType>);
    PADDLE_ENFORCE_XDNN_SUCCESS(r, "sigmoid");
  }
@@ -363,7 +388,7 @@ struct XPUTanhFunctor : public funcs::BaseActivationFunctor<T> {
  void operator()(const Context& dev_ctx,
                  const DenseTensor& x,
                  DenseTensor* out) const {
-    int r = xpu_activation_func<Context, T, XPUType>(
+    int r = xpu_activation_func_with_max_x_y<Context, T, XPUType>(
        dev_ctx, x, out, xpu::tanh<XPUType>);
    PADDLE_ENFORCE_XDNN_SUCCESS(r, "tanh");
  }