cast and gradient_accumulator support double for xpu, test=kunlun (#47800)

982d5ff7 · zhangyikun02 · GitHub · 635958d9 · 982d5ff7 · 982d5ff7
5 changed file
--- a/paddle/fluid/imperative/gradient_accumulator.cc
+++ b/paddle/fluid/imperative/gradient_accumulator.cc
@@ -31,6 +31,7 @@
 #include "paddle/phi/kernels/funcs/math_function.h"
 #include "paddle/phi/kernels/funcs/selected_rows_functor.h"
 #ifdef PADDLE_WITH_XPU
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
 #include "xpu/refactor/math.h"
 #endif
 #ifdef PADDLE_WITH_ASCEND_CL
@@ -92,13 +93,30 @@ void XPUTensorAddFunctor(const platform::Place& place,
      platform::DeviceContextPool::Instance().Get(place));
  const XPUType* x = reinterpret_cast<const XPUType*>(src.data<T>());
  XPUType* y = reinterpret_cast<XPUType*>(dst->mutable_data<T>(place));
-  int r = xpu::add<XPUType>(
-      ctx->x_context(), x, y, y, static_cast<int>(src.numel()));
-  PADDLE_ENFORCE_EQ(
-      r,
-      XPU_SUCCESS,
-      platform::errors::External(
-          "XPU add kernel return wrong value[%d %s]", r, XPUAPIErrorMsg[r]));
+  int r = -1;
+  int numel = static_cast<int>(src.numel());
+  if (std::is_same<T, double>::value) {
+    xpu::ctx_guard RAII_GUARD(ctx->x_context());
+    float* x_cast_to_fp32 = RAII_GUARD.alloc<float>(numel);
+    PADDLE_ENFORCE_XDNN_NOT_NULL(x_cast_to_fp32);
+    float* y_cast_to_fp32 = RAII_GUARD.alloc<float>(numel);
+    PADDLE_ENFORCE_XDNN_NOT_NULL(y_cast_to_fp32);
+    r = xpu::cast<XPUType, float>(ctx->x_context(), x, x_cast_to_fp32, numel);
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast");
+    r = xpu::cast<XPUType, float>(ctx->x_context(), y, y_cast_to_fp32, numel);
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast");
+    r = xpu::add<float>(ctx->x_context(),
+                        x_cast_to_fp32,
+                        y_cast_to_fp32,
+                        y_cast_to_fp32,
+                        numel);
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "add");
+    r = xpu::cast<float, XPUType>(ctx->x_context(), y_cast_to_fp32, y, numel);
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast");
+  } else {
+    r = xpu::add<XPUType>(ctx->x_context(), x, y, y, numel);
+    PADDLE_ENFORCE_XDNN_SUCCESS(r, "add");
+  }
 }
 #endif

@@ -286,6 +304,8 @@ void TensorAdd(const VarType& src, VarType* dst) {
    } else if (data_type ==
               framework::DataTypeTrait<platform::float16>::DataType()) {
      XPUTensorAddFunctor<platform::float16>(place, src_tensor, dst_tensor);
+    } else if (data_type == framework::DataTypeTrait<double>::DataType()) {
+      XPUTensorAddFunctor<double>(place, src_tensor, dst_tensor);
    } else {
      PADDLE_THROW(platform::errors::Unimplemented(
          "Gradient accumulation of data type (%s) on place (%s) is not "

--- a/paddle/fluid/platform/device/xpu/xpu2_op_list.h
+++ b/paddle/fluid/platform/device/xpu/xpu2_op_list.h
@@ -103,7 +103,9 @@ XPUOpMap& get_kl2_ops() {
      {"cast",
       XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
                     pOpKernelType(vartype::FP16, XPUPlace()),
+                     pOpKernelType(vartype::FP64, XPUPlace()),
                     pOpKernelType(vartype::BOOL, XPUPlace()),
+                     pOpKernelType(vartype::UINT8, XPUPlace()),
                     pOpKernelType(vartype::INT64, XPUPlace()),
                     pOpKernelType(vartype::INT32, XPUPlace())})},
      {"check_finite_and_unscale",

--- a/paddle/phi/kernels/xpu/cast_kernel.cc
+++ b/paddle/phi/kernels/xpu/cast_kernel.cc
@@ -14,6 +14,7 @@

 #include "paddle/phi/kernels/cast_kernel.h"

+#include "paddle/phi/backends/xpu/enforce_xpu.h"
 #include "paddle/phi/backends/xpu/xpu_context.h"
 #include "paddle/phi/common/float16.h"
 #include "paddle/phi/core/enforce.h"
@@ -80,16 +81,19 @@ void CastKernel(const Context& dev_ctx,
          dev_ctx.template Alloc<uint8_t>(out),
          numel);
      break;
+    case phi::DataType::FLOAT64:
+      r = xpu::cast_v2<XPUInTDType, double>(
+          dev_ctx.x_context(),
+          reinterpret_cast<const XPUInTDType*>(in_data),
+          dev_ctx.template Alloc<double>(out),
+          numel);
+      break;
    default:
      PADDLE_THROW(phi::errors::Unavailable(
          "Not supported cast %d -> %d", x.dtype(), out_dtype));
  }

-  PADDLE_ENFORCE_EQ(
-      r,
-      XPU_SUCCESS,
-      phi::errors::External(
-          "XPU CAST API return wrong value[%d %s].", r, XPUAPIErrorMsg[r]));
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast_v2");
 }
 }  // namespace phi

@@ -101,6 +105,8 @@ PD_REGISTER_KERNEL(cast,
                   float,
                   phi::dtype::float16,
                   int64_t,
-                   bool) {
+                   bool,
+                   uint8_t,
+                   double) {
  kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED);
 }
--- a/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py
@@ -18,7 +18,6 @@ import sys

 sys.path.append("..")

-from op_test import OpTest
 import paddle
 import paddle.fluid as fluid
 from op_test_xpu import XPUOpTest
@@ -88,7 +87,7 @@ class XPUTestAdadelta(XPUOpTestWrapper):
        def test_check_output(self):
            self.check_output()

-    class TestAdadeltaOp2(OpTest):
+    class TestAdadeltaOp2(XPUOpTest):
        '''Test Adadelta op with default attribute values'''

        def setUp(self):

--- a/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_cast_op_xpu.py
@@ -36,6 +36,7 @@ typeid_dict = {
    'float16': int(core.VarDesc.VarType.FP16),
    'bool': int(core.VarDesc.VarType.BOOL),
    'uint8': int(core.VarDesc.VarType.UINT8),
+    'float64': int(core.VarDesc.VarType.FP64),
 }


@@ -47,7 +48,15 @@ class XPUTestCastOp(XPUOpTestWrapper):
    def dynamic_create_class(self):
        base_class = self.TestCastOp
        classes = []
-        for out_type in {'float16', 'float32', 'int32', 'int64', 'uint8'}:
+        for out_type in {
+            'float16',
+            'float32',
+            'int32',
+            'int64',
+            'uint8',
+            'bool',
+            'float64',
+        }:
            class_name = 'XPUTestCastOp_outtype_' + out_type
            attr_dict = {'out_typename': out_type}
            classes.append([class_name, attr_dict])