From 7581ef9ea3789bbeada34bccf9add41731346361 Mon Sep 17 00:00:00 2001
From: houj04 <35131887+houj04@users.noreply.github.com>
Date: Tue, 21 Mar 2023 10:08:52 +0800
Subject: [PATCH] [XPU] add fp16 support for compare ops. (#51846)

* [XPU] add fp16 support for compare ops.

* fix ci.
---
 paddle/phi/backends/xpu/xpu2_op_list.cc       |  6 +++
 paddle/phi/kernels/xpu/compare_kernel.cc      | 46 +++++++++++------
 paddle/phi/kernels/xpu/cum_kernel.cc          | 51 ++++---------------
 .../xpu/test_gaussian_random_op_xpu.py        | 12 ++---
 .../unittests/xpu/test_set_value_op_xpu.py    |  6 +--
 5 files changed, 56 insertions(+), 65 deletions(-)
diff --git a/paddle/phi/backends/xpu/xpu2_op_list.cc b/paddle/phi/backends/xpu/xpu2_op_list.cc
index 86739d05fb5..cf0a9b65645 100644
--- a/paddle/phi/backends/xpu/xpu2_op_list.cc
+++ b/paddle/phi/backends/xpu/xpu2_op_list.cc
@@ -241,6 +241,7 @@ XPUOpMap& get_kl2_ops() {
       {"equal",
        XPUKernelSet({phi::DataType::INT64,
                      phi::DataType::INT32,
+                     phi::DataType::FLOAT16,
                      phi::DataType::FLOAT32})},
       {"exp_grad", XPUKernelSet({phi::DataType::FLOAT32})},
       {"exp", XPUKernelSet({phi::DataType::FLOAT32})},
@@ -371,10 +372,12 @@ XPUOpMap& get_kl2_ops() {
       {"greater_equal",
        XPUKernelSet({phi::DataType::INT64,
                      phi::DataType::INT32,
+                     phi::DataType::FLOAT16,
                      phi::DataType::FLOAT32})},
       {"greater_than",
        XPUKernelSet({phi::DataType::INT64,
                      phi::DataType::INT32,
+                     phi::DataType::FLOAT16,
                      phi::DataType::FLOAT32})},
       {"grid_sampler_grad", XPUKernelSet({phi::DataType::FLOAT32})},
       {"grid_sampler", XPUKernelSet({phi::DataType::FLOAT32})},
@@ -419,10 +422,12 @@ XPUOpMap& get_kl2_ops() {
       {"less_equal",
        XPUKernelSet({phi::DataType::INT64,
                      phi::DataType::INT32,
+                     phi::DataType::FLOAT16,
                      phi::DataType::FLOAT32})},
       {"less_than",
        XPUKernelSet({phi::DataType::INT64,
                      phi::DataType::INT32,
+                     phi::DataType::FLOAT16,
                      phi::DataType::FLOAT32})},
       {"load", XPUKernelSet({phi::DataType::FLOAT32})},
       {"load_combine",
@@ -489,6 +494,7 @@ XPUOpMap& get_kl2_ops() {
       {"not_equal",
        XPUKernelSet({phi::DataType::INT64,
                      phi::DataType::INT32,
+                     phi::DataType::FLOAT16,
                      phi::DataType::FLOAT32})},
       {"one_hot", XPUKernelSet({phi::DataType::INT32, phi::DataType::INT64})},
       {"one_hot_v2",
diff --git a/paddle/phi/kernels/xpu/compare_kernel.cc b/paddle/phi/kernels/xpu/compare_kernel.cc
index a41bf25449f..4c9900cffa5 100644
--- a/paddle/phi/kernels/xpu/compare_kernel.cc
+++ b/paddle/phi/kernels/xpu/compare_kernel.cc
@@ -89,8 +89,14 @@ DEFINE_XPU_COMPARE_KERNEL(GreaterEqual, xpu::broadcast_greater_equal<XPUType>)
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(
-    less_than, XPU, ALL_LAYOUT, phi::LessThanKernel, int, int64_t, float) {
+PD_REGISTER_KERNEL(less_than,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::LessThanKernel,
+                   int,
+                   int64_t,
+                   float,
+                   phi::dtype::float16) {
   kernel->OutputAt(0).SetDataType(phi::DataType::BOOL);
 }
 
@@ -100,23 +106,31 @@ PD_REGISTER_KERNEL(less_than_raw,
                    phi::LessThanRawKernel,
                    int,
                    int64_t,
-                   float) {
+                   float,
+                   phi::dtype::float16) {
   kernel->OutputAt(0).SetDataType(phi::DataType::BOOL);
 }
 
-#define PD_REGISTER_COMPARE_KERNEL(name, func)                         \
-  PD_REGISTER_KERNEL(                                                  \
-      name, XPU, ALL_LAYOUT, phi::func##Kernel, int, int64_t, float) { \
-    kernel->OutputAt(0).SetDataType(phi::DataType::BOOL);              \
-  }                                                                    \
-  PD_REGISTER_KERNEL(name##_raw,                                       \
-                     XPU,                                              \
-                     ALL_LAYOUT,                                       \
-                     phi::func##RawKernel,                             \
-                     int,                                              \
-                     int64_t,                                          \
-                     float) {                                          \
-    kernel->OutputAt(0).SetDataType(phi::DataType::BOOL);              \
+#define PD_REGISTER_COMPARE_KERNEL(name, func)            \
+  PD_REGISTER_KERNEL(name,                                \
+                     XPU,                                 \
+                     ALL_LAYOUT,                          \
+                     phi::func##Kernel,                   \
+                     int,                                 \
+                     int64_t,                             \
+                     float,                               \
+                     phi::dtype::float16) {               \
+    kernel->OutputAt(0).SetDataType(phi::DataType::BOOL); \
+  }                                                       \
+  PD_REGISTER_KERNEL(name##_raw,                          \
+                     XPU,                                 \
+                     ALL_LAYOUT,                          \
+                     phi::func##RawKernel,                \
+                     int,                                 \
+                     int64_t,                             \
+                     float,                               \
+                     phi::dtype::float16) {               \
+    kernel->OutputAt(0).SetDataType(phi::DataType::BOOL); \
   }
 
 PD_REGISTER_COMPARE_KERNEL(less_equal, LessEqual)
diff --git a/paddle/phi/kernels/xpu/cum_kernel.cc b/paddle/phi/kernels/xpu/cum_kernel.cc
index 762eb06eb37..cadacf102a8 100644
--- a/paddle/phi/kernels/xpu/cum_kernel.cc
+++ b/paddle/phi/kernels/xpu/cum_kernel.cc
@@ -66,46 +66,17 @@ void CumsumKernel(const Context& dev_ctx,
     }
   }
 
-  // special for fp16
-  if (std::is_same<T, dtype::float16>::value) {
-    xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
-    float* cast_input_fp32 = RAII_GUARD.alloc_l3_or_gm<float>(x.numel());
-    float* temp_result_fp32 = RAII_GUARD.alloc_l3_or_gm<float>(x.numel());
-    // cast to fp32
-    int r =
-        xpu::cast<XPUType, float>(dev_ctx.x_context(),
-                                  reinterpret_cast<const XPUType*>(x.data<T>()),
-                                  cast_input_fp32,
-                                  x.numel());
-    PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast");
-    // cumsum in fp32
-    r = xpu::cumsum<float>(dev_ctx.x_context(),
-                           cast_input_fp32,
-                           temp_result_fp32,
-                           x_shape,
-                           reverse,
-                           exclusive,
-                           axis_as_int);
-    PADDLE_ENFORCE_XDNN_SUCCESS(r, "cumsum");
-    // cast back to fp16
-    r = xpu::cast<float, XPUType>(dev_ctx.x_context(),
-                                  temp_result_fp32,
-                                  reinterpret_cast<XPUType*>(out->data<T>()),
-                                  x.numel());
-    PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast");
-  } else {
-    // template<typename T> DLL_EXPORT int cumsum(Context* ctx, const T* x, T*
-    // y, const std::vector<int>& xshape, bool reverse, bool exclusive, int
-    // axis);
-    int r = xpu::cumsum<XPUType>(dev_ctx.x_context(),
-                                 reinterpret_cast<const XPUType*>(x.data<T>()),
-                                 reinterpret_cast<XPUType*>(out->data<T>()),
-                                 x_shape,
-                                 reverse,
-                                 exclusive,
-                                 axis_as_int);
-    PADDLE_ENFORCE_XDNN_SUCCESS(r, "cumsum");
-  }
+  // template<typename T> DLL_EXPORT int cumsum(Context* ctx, const T* x, T*
+  // y, const std::vector<int>& xshape, bool reverse, bool exclusive, int
+  // axis);
+  int r = xpu::cumsum<XPUType>(dev_ctx.x_context(),
+                               reinterpret_cast<const XPUType*>(x.data<T>()),
+                               reinterpret_cast<XPUType*>(out->data<T>()),
+                               x_shape,
+                               reverse,
+                               exclusive,
+                               axis_as_int);
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "cumsum");
 }
 
 }  // namespace phi
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_gaussian_random_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_gaussian_random_op_xpu.py
index 7280a4e80f0..7679baf1950 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_gaussian_random_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_gaussian_random_op_xpu.py
@@ -265,9 +265,8 @@ class TestGaussianRandomAPI(unittest.TestCase):
 
         def test_default_fp16():
             paddle.framework.set_default_dtype('float16')
-            paddle.tensor.random.gaussian([2, 3])
-
-        self.assertRaises(TypeError, test_default_fp16)
+            out = paddle.tensor.random.gaussian([2, 3])
+            self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP16)
 
         def test_default_fp32():
             paddle.framework.set_default_dtype('float32')
@@ -281,6 +280,7 @@ class TestGaussianRandomAPI(unittest.TestCase):
 
         test_default_fp64()
         test_default_fp32()
+        test_default_fp16()
 
         paddle.enable_static()
 
@@ -291,9 +291,8 @@ class TestStandardNormalDtype(unittest.TestCase):
 
         def test_default_fp16():
             paddle.framework.set_default_dtype('float16')
-            paddle.tensor.random.standard_normal([2, 3])
-
-        self.assertRaises(TypeError, test_default_fp16)
+            out = paddle.tensor.random.standard_normal([2, 3])
+            self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP16)
 
         def test_default_fp32():
             paddle.framework.set_default_dtype('float32')
@@ -307,6 +306,7 @@ class TestStandardNormalDtype(unittest.TestCase):
 
         test_default_fp64()
         test_default_fp32()
+        test_default_fp16()
 
         paddle.enable_static()
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py
index d8094e7ad71..7f18a297dd4 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py
@@ -1259,13 +1259,13 @@ class XPUTestSetValueOp(XPUOpTestWrapper):
                 # test stop_gradient
                 value.stop_gradient = True
                 x.stop_gradient = False
-                start = paddle.tensor.layers.fill_constant(
+                start = paddle.tensor.fill_constant(
                     [1], "int32", 5, force_cpu=True
                 )
-                end = paddle.tensor.layers.fill_constant(
+                end = paddle.tensor.fill_constant(
                     [1], "int32", 0, force_cpu=True
                 )
-                step = paddle.tensor.layers.fill_constant(
+                step = paddle.tensor.fill_constant(
                     [1], "int32", -2, force_cpu=True
                 )
 
-- 
GitLab