From 982d5ff77996a9c2ec8ef5f05cdb87849e03d979 Mon Sep 17 00:00:00 2001 From: zhangyikun02 <48021248+zhangyk0314@users.noreply.github.com> Date: Fri, 18 Nov 2022 11:25:40 +0800 Subject: [PATCH] cast and gradient_accumulator support double for xpu, test=kunlun (#47800) --- .../fluid/imperative/gradient_accumulator.cc | 34 +++++++++++++++---- .../fluid/platform/device/xpu/xpu2_op_list.h | 2 ++ paddle/phi/kernels/xpu/cast_kernel.cc | 18 ++++++---- .../unittests/xpu/test_adadelta_op_xpu.py | 3 +- .../tests/unittests/xpu/test_cast_op_xpu.py | 11 +++++- 5 files changed, 52 insertions(+), 16 deletions(-) diff --git a/paddle/fluid/imperative/gradient_accumulator.cc b/paddle/fluid/imperative/gradient_accumulator.cc index 188617dd310..b57c874ceeb 100644 --- a/paddle/fluid/imperative/gradient_accumulator.cc +++ b/paddle/fluid/imperative/gradient_accumulator.cc @@ -31,6 +31,7 @@ #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/selected_rows_functor.h" #ifdef PADDLE_WITH_XPU +#include "paddle/phi/backends/xpu/enforce_xpu.h" #include "xpu/refactor/math.h" #endif #ifdef PADDLE_WITH_ASCEND_CL @@ -92,13 +93,30 @@ void XPUTensorAddFunctor(const platform::Place& place, platform::DeviceContextPool::Instance().Get(place)); const XPUType* x = reinterpret_cast(src.data()); XPUType* y = reinterpret_cast(dst->mutable_data(place)); - int r = xpu::add( - ctx->x_context(), x, y, y, static_cast(src.numel())); - PADDLE_ENFORCE_EQ( - r, - XPU_SUCCESS, - platform::errors::External( - "XPU add kernel return wrong value[%d %s]", r, XPUAPIErrorMsg[r])); + int r = -1; + int numel = static_cast(src.numel()); + if (std::is_same::value) { + xpu::ctx_guard RAII_GUARD(ctx->x_context()); + float* x_cast_to_fp32 = RAII_GUARD.alloc(numel); + PADDLE_ENFORCE_XDNN_NOT_NULL(x_cast_to_fp32); + float* y_cast_to_fp32 = RAII_GUARD.alloc(numel); + PADDLE_ENFORCE_XDNN_NOT_NULL(y_cast_to_fp32); + r = xpu::cast(ctx->x_context(), x, x_cast_to_fp32, numel); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast"); + r = xpu::cast(ctx->x_context(), y, y_cast_to_fp32, numel); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast"); + r = xpu::add(ctx->x_context(), + x_cast_to_fp32, + y_cast_to_fp32, + y_cast_to_fp32, + numel); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "add"); + r = xpu::cast(ctx->x_context(), y_cast_to_fp32, y, numel); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast"); + } else { + r = xpu::add(ctx->x_context(), x, y, y, numel); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "add"); + } } #endif @@ -286,6 +304,8 @@ void TensorAdd(const VarType& src, VarType* dst) { } else if (data_type == framework::DataTypeTrait::DataType()) { XPUTensorAddFunctor(place, src_tensor, dst_tensor); + } else if (data_type == framework::DataTypeTrait::DataType()) { + XPUTensorAddFunctor(place, src_tensor, dst_tensor); } else { PADDLE_THROW(platform::errors::Unimplemented( "Gradient accumulation of data type (%s) on place (%s) is not " diff --git a/paddle/fluid/platform/device/xpu/xpu2_op_list.h b/paddle/fluid/platform/device/xpu/xpu2_op_list.h index cbcbde8f9dd..ae6d53989c3 100644 --- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h +++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h @@ -103,7 +103,9 @@ XPUOpMap& get_kl2_ops() { {"cast", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), pOpKernelType(vartype::FP16, XPUPlace()), + pOpKernelType(vartype::FP64, XPUPlace()), pOpKernelType(vartype::BOOL, XPUPlace()), + pOpKernelType(vartype::UINT8, XPUPlace()), pOpKernelType(vartype::INT64, XPUPlace()), pOpKernelType(vartype::INT32, XPUPlace())})}, {"check_finite_and_unscale", diff --git a/paddle/phi/kernels/xpu/cast_kernel.cc b/paddle/phi/kernels/xpu/cast_kernel.cc index 502b8324522..346cf4cd3bf 100644 --- a/paddle/phi/kernels/xpu/cast_kernel.cc +++ b/paddle/phi/kernels/xpu/cast_kernel.cc @@ -14,6 +14,7 @@ #include "paddle/phi/kernels/cast_kernel.h" +#include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/backends/xpu/xpu_context.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/enforce.h" @@ -80,16 +81,19 @@ void CastKernel(const Context& dev_ctx, dev_ctx.template Alloc(out), numel); break; + case phi::DataType::FLOAT64: + r = xpu::cast_v2( + dev_ctx.x_context(), + reinterpret_cast(in_data), + dev_ctx.template Alloc(out), + numel); + break; default: PADDLE_THROW(phi::errors::Unavailable( "Not supported cast %d -> %d", x.dtype(), out_dtype)); } - PADDLE_ENFORCE_EQ( - r, - XPU_SUCCESS, - phi::errors::External( - "XPU CAST API return wrong value[%d %s].", r, XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast_v2"); } } // namespace phi @@ -101,6 +105,8 @@ PD_REGISTER_KERNEL(cast, float, phi::dtype::float16, int64_t, - bool) { + bool, + uint8_t, + double) { kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED); } diff --git a/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py index d65e20522a2..4ded307cba9 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py @@ -18,7 +18,6 @@ import sys sys.path.append("..") -from op_test import OpTest import paddle import paddle.fluid as fluid from op_test_xpu import XPUOpTest @@ -88,7 +87,7 @@ class XPUTestAdadelta(XPUOpTestWrapper): def test_check_output(self): self.check_output() - class TestAdadeltaOp2(OpTest): + class TestAdadeltaOp2(XPUOpTest): '''Test Adadelta op with default attribute values''' def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py index a2e136dccaa..a69c439c8cb 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py @@ -36,6 +36,7 @@ typeid_dict = { 'float16': int(core.VarDesc.VarType.FP16), 'bool': int(core.VarDesc.VarType.BOOL), 'uint8': int(core.VarDesc.VarType.UINT8), + 'float64': int(core.VarDesc.VarType.FP64), } @@ -47,7 +48,15 @@ class XPUTestCastOp(XPUOpTestWrapper): def dynamic_create_class(self): base_class = self.TestCastOp classes = [] - for out_type in {'float16', 'float32', 'int32', 'int64', 'uint8'}: + for out_type in { + 'float16', + 'float32', + 'int32', + 'int64', + 'uint8', + 'bool', + 'float64', + }: class_name = 'XPUTestCastOp_outtype_' + out_type attr_dict = {'out_typename': out_type} classes.append([class_name, attr_dict]) -- GitLab