From 7581ef9ea3789bbeada34bccf9add41731346361 Mon Sep 17 00:00:00 2001 From: houj04 <35131887+houj04@users.noreply.github.com> Date: Tue, 21 Mar 2023 10:08:52 +0800 Subject: [PATCH] [XPU] add fp16 support for compare ops. (#51846) * [XPU] add fp16 support for compare ops. * fix ci. --- paddle/phi/backends/xpu/xpu2_op_list.cc | 6 +++ paddle/phi/kernels/xpu/compare_kernel.cc | 46 +++++++++++------ paddle/phi/kernels/xpu/cum_kernel.cc | 51 ++++--------------- .../xpu/test_gaussian_random_op_xpu.py | 12 ++--- .../unittests/xpu/test_set_value_op_xpu.py | 6 +-- 5 files changed, 56 insertions(+), 65 deletions(-) diff --git a/paddle/phi/backends/xpu/xpu2_op_list.cc b/paddle/phi/backends/xpu/xpu2_op_list.cc index 86739d05fb5..cf0a9b65645 100644 --- a/paddle/phi/backends/xpu/xpu2_op_list.cc +++ b/paddle/phi/backends/xpu/xpu2_op_list.cc @@ -241,6 +241,7 @@ XPUOpMap& get_kl2_ops() { {"equal", XPUKernelSet({phi::DataType::INT64, phi::DataType::INT32, + phi::DataType::FLOAT16, phi::DataType::FLOAT32})}, {"exp_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"exp", XPUKernelSet({phi::DataType::FLOAT32})}, @@ -371,10 +372,12 @@ XPUOpMap& get_kl2_ops() { {"greater_equal", XPUKernelSet({phi::DataType::INT64, phi::DataType::INT32, + phi::DataType::FLOAT16, phi::DataType::FLOAT32})}, {"greater_than", XPUKernelSet({phi::DataType::INT64, phi::DataType::INT32, + phi::DataType::FLOAT16, phi::DataType::FLOAT32})}, {"grid_sampler_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"grid_sampler", XPUKernelSet({phi::DataType::FLOAT32})}, @@ -419,10 +422,12 @@ XPUOpMap& get_kl2_ops() { {"less_equal", XPUKernelSet({phi::DataType::INT64, phi::DataType::INT32, + phi::DataType::FLOAT16, phi::DataType::FLOAT32})}, {"less_than", XPUKernelSet({phi::DataType::INT64, phi::DataType::INT32, + phi::DataType::FLOAT16, phi::DataType::FLOAT32})}, {"load", XPUKernelSet({phi::DataType::FLOAT32})}, {"load_combine", @@ -489,6 +494,7 @@ XPUOpMap& get_kl2_ops() { {"not_equal", XPUKernelSet({phi::DataType::INT64, phi::DataType::INT32, + phi::DataType::FLOAT16, phi::DataType::FLOAT32})}, {"one_hot", XPUKernelSet({phi::DataType::INT32, phi::DataType::INT64})}, {"one_hot_v2", diff --git a/paddle/phi/kernels/xpu/compare_kernel.cc b/paddle/phi/kernels/xpu/compare_kernel.cc index a41bf25449f..4c9900cffa5 100644 --- a/paddle/phi/kernels/xpu/compare_kernel.cc +++ b/paddle/phi/kernels/xpu/compare_kernel.cc @@ -89,8 +89,14 @@ DEFINE_XPU_COMPARE_KERNEL(GreaterEqual, xpu::broadcast_greater_equal) } // namespace phi -PD_REGISTER_KERNEL( - less_than, XPU, ALL_LAYOUT, phi::LessThanKernel, int, int64_t, float) { +PD_REGISTER_KERNEL(less_than, + XPU, + ALL_LAYOUT, + phi::LessThanKernel, + int, + int64_t, + float, + phi::dtype::float16) { kernel->OutputAt(0).SetDataType(phi::DataType::BOOL); } @@ -100,23 +106,31 @@ PD_REGISTER_KERNEL(less_than_raw, phi::LessThanRawKernel, int, int64_t, - float) { + float, + phi::dtype::float16) { kernel->OutputAt(0).SetDataType(phi::DataType::BOOL); } -#define PD_REGISTER_COMPARE_KERNEL(name, func) \ - PD_REGISTER_KERNEL( \ - name, XPU, ALL_LAYOUT, phi::func##Kernel, int, int64_t, float) { \ - kernel->OutputAt(0).SetDataType(phi::DataType::BOOL); \ - } \ - PD_REGISTER_KERNEL(name##_raw, \ - XPU, \ - ALL_LAYOUT, \ - phi::func##RawKernel, \ - int, \ - int64_t, \ - float) { \ - kernel->OutputAt(0).SetDataType(phi::DataType::BOOL); \ +#define PD_REGISTER_COMPARE_KERNEL(name, func) \ + PD_REGISTER_KERNEL(name, \ + XPU, \ + ALL_LAYOUT, \ + phi::func##Kernel, \ + int, \ + int64_t, \ + float, \ + phi::dtype::float16) { \ + kernel->OutputAt(0).SetDataType(phi::DataType::BOOL); \ + } \ + PD_REGISTER_KERNEL(name##_raw, \ + XPU, \ + ALL_LAYOUT, \ + phi::func##RawKernel, \ + int, \ + int64_t, \ + float, \ + phi::dtype::float16) { \ + kernel->OutputAt(0).SetDataType(phi::DataType::BOOL); \ } PD_REGISTER_COMPARE_KERNEL(less_equal, LessEqual) diff --git a/paddle/phi/kernels/xpu/cum_kernel.cc b/paddle/phi/kernels/xpu/cum_kernel.cc index 762eb06eb37..cadacf102a8 100644 --- a/paddle/phi/kernels/xpu/cum_kernel.cc +++ b/paddle/phi/kernels/xpu/cum_kernel.cc @@ -66,46 +66,17 @@ void CumsumKernel(const Context& dev_ctx, } } - // special for fp16 - if (std::is_same::value) { - xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); - float* cast_input_fp32 = RAII_GUARD.alloc_l3_or_gm(x.numel()); - float* temp_result_fp32 = RAII_GUARD.alloc_l3_or_gm(x.numel()); - // cast to fp32 - int r = - xpu::cast(dev_ctx.x_context(), - reinterpret_cast(x.data()), - cast_input_fp32, - x.numel()); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast"); - // cumsum in fp32 - r = xpu::cumsum(dev_ctx.x_context(), - cast_input_fp32, - temp_result_fp32, - x_shape, - reverse, - exclusive, - axis_as_int); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "cumsum"); - // cast back to fp16 - r = xpu::cast(dev_ctx.x_context(), - temp_result_fp32, - reinterpret_cast(out->data()), - x.numel()); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast"); - } else { - // template DLL_EXPORT int cumsum(Context* ctx, const T* x, T* - // y, const std::vector& xshape, bool reverse, bool exclusive, int - // axis); - int r = xpu::cumsum(dev_ctx.x_context(), - reinterpret_cast(x.data()), - reinterpret_cast(out->data()), - x_shape, - reverse, - exclusive, - axis_as_int); - PADDLE_ENFORCE_XDNN_SUCCESS(r, "cumsum"); - } + // template DLL_EXPORT int cumsum(Context* ctx, const T* x, T* + // y, const std::vector& xshape, bool reverse, bool exclusive, int + // axis); + int r = xpu::cumsum(dev_ctx.x_context(), + reinterpret_cast(x.data()), + reinterpret_cast(out->data()), + x_shape, + reverse, + exclusive, + axis_as_int); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "cumsum"); } } // namespace phi diff --git a/python/paddle/fluid/tests/unittests/xpu/test_gaussian_random_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_gaussian_random_op_xpu.py index 7280a4e80f0..7679baf1950 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_gaussian_random_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_gaussian_random_op_xpu.py @@ -265,9 +265,8 @@ class TestGaussianRandomAPI(unittest.TestCase): def test_default_fp16(): paddle.framework.set_default_dtype('float16') - paddle.tensor.random.gaussian([2, 3]) - - self.assertRaises(TypeError, test_default_fp16) + out = paddle.tensor.random.gaussian([2, 3]) + self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP16) def test_default_fp32(): paddle.framework.set_default_dtype('float32') @@ -281,6 +280,7 @@ class TestGaussianRandomAPI(unittest.TestCase): test_default_fp64() test_default_fp32() + test_default_fp16() paddle.enable_static() @@ -291,9 +291,8 @@ class TestStandardNormalDtype(unittest.TestCase): def test_default_fp16(): paddle.framework.set_default_dtype('float16') - paddle.tensor.random.standard_normal([2, 3]) - - self.assertRaises(TypeError, test_default_fp16) + out = paddle.tensor.random.standard_normal([2, 3]) + self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP16) def test_default_fp32(): paddle.framework.set_default_dtype('float32') @@ -307,6 +306,7 @@ class TestStandardNormalDtype(unittest.TestCase): test_default_fp64() test_default_fp32() + test_default_fp16() paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py index d8094e7ad71..7f18a297dd4 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py @@ -1259,13 +1259,13 @@ class XPUTestSetValueOp(XPUOpTestWrapper): # test stop_gradient value.stop_gradient = True x.stop_gradient = False - start = paddle.tensor.layers.fill_constant( + start = paddle.tensor.fill_constant( [1], "int32", 5, force_cpu=True ) - end = paddle.tensor.layers.fill_constant( + end = paddle.tensor.fill_constant( [1], "int32", 0, force_cpu=True ) - step = paddle.tensor.layers.fill_constant( + step = paddle.tensor.fill_constant( [1], "int32", -2, force_cpu=True ) -- GitLab