未验证 提交 982d5ff7 编写于 作者: Z zhangyikun02 提交者: GitHub

cast and gradient_accumulator support double for xpu, test=kunlun (#47800)

上级 635958d9
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h" #include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "xpu/refactor/math.h" #include "xpu/refactor/math.h"
#endif #endif
#ifdef PADDLE_WITH_ASCEND_CL #ifdef PADDLE_WITH_ASCEND_CL
...@@ -92,13 +93,30 @@ void XPUTensorAddFunctor(const platform::Place& place, ...@@ -92,13 +93,30 @@ void XPUTensorAddFunctor(const platform::Place& place,
platform::DeviceContextPool::Instance().Get(place)); platform::DeviceContextPool::Instance().Get(place));
const XPUType* x = reinterpret_cast<const XPUType*>(src.data<T>()); const XPUType* x = reinterpret_cast<const XPUType*>(src.data<T>());
XPUType* y = reinterpret_cast<XPUType*>(dst->mutable_data<T>(place)); XPUType* y = reinterpret_cast<XPUType*>(dst->mutable_data<T>(place));
int r = xpu::add<XPUType>( int r = -1;
ctx->x_context(), x, y, y, static_cast<int>(src.numel())); int numel = static_cast<int>(src.numel());
PADDLE_ENFORCE_EQ( if (std::is_same<T, double>::value) {
r, xpu::ctx_guard RAII_GUARD(ctx->x_context());
XPU_SUCCESS, float* x_cast_to_fp32 = RAII_GUARD.alloc<float>(numel);
platform::errors::External( PADDLE_ENFORCE_XDNN_NOT_NULL(x_cast_to_fp32);
"XPU add kernel return wrong value[%d %s]", r, XPUAPIErrorMsg[r])); float* y_cast_to_fp32 = RAII_GUARD.alloc<float>(numel);
PADDLE_ENFORCE_XDNN_NOT_NULL(y_cast_to_fp32);
r = xpu::cast<XPUType, float>(ctx->x_context(), x, x_cast_to_fp32, numel);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast");
r = xpu::cast<XPUType, float>(ctx->x_context(), y, y_cast_to_fp32, numel);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast");
r = xpu::add<float>(ctx->x_context(),
x_cast_to_fp32,
y_cast_to_fp32,
y_cast_to_fp32,
numel);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "add");
r = xpu::cast<float, XPUType>(ctx->x_context(), y_cast_to_fp32, y, numel);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast");
} else {
r = xpu::add<XPUType>(ctx->x_context(), x, y, y, numel);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "add");
}
} }
#endif #endif
...@@ -286,6 +304,8 @@ void TensorAdd(const VarType& src, VarType* dst) { ...@@ -286,6 +304,8 @@ void TensorAdd(const VarType& src, VarType* dst) {
} else if (data_type == } else if (data_type ==
framework::DataTypeTrait<platform::float16>::DataType()) { framework::DataTypeTrait<platform::float16>::DataType()) {
XPUTensorAddFunctor<platform::float16>(place, src_tensor, dst_tensor); XPUTensorAddFunctor<platform::float16>(place, src_tensor, dst_tensor);
} else if (data_type == framework::DataTypeTrait<double>::DataType()) {
XPUTensorAddFunctor<double>(place, src_tensor, dst_tensor);
} else { } else {
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unimplemented(
"Gradient accumulation of data type (%s) on place (%s) is not " "Gradient accumulation of data type (%s) on place (%s) is not "
......
...@@ -103,7 +103,9 @@ XPUOpMap& get_kl2_ops() { ...@@ -103,7 +103,9 @@ XPUOpMap& get_kl2_ops() {
{"cast", {"cast",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace()), pOpKernelType(vartype::FP16, XPUPlace()),
pOpKernelType(vartype::FP64, XPUPlace()),
pOpKernelType(vartype::BOOL, XPUPlace()), pOpKernelType(vartype::BOOL, XPUPlace()),
pOpKernelType(vartype::UINT8, XPUPlace()),
pOpKernelType(vartype::INT64, XPUPlace()), pOpKernelType(vartype::INT64, XPUPlace()),
pOpKernelType(vartype::INT32, XPUPlace())})}, pOpKernelType(vartype::INT32, XPUPlace())})},
{"check_finite_and_unscale", {"check_finite_and_unscale",
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "paddle/phi/kernels/cast_kernel.h" #include "paddle/phi/kernels/cast_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h" #include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/common/float16.h" #include "paddle/phi/common/float16.h"
#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/enforce.h"
...@@ -80,16 +81,19 @@ void CastKernel(const Context& dev_ctx, ...@@ -80,16 +81,19 @@ void CastKernel(const Context& dev_ctx,
dev_ctx.template Alloc<uint8_t>(out), dev_ctx.template Alloc<uint8_t>(out),
numel); numel);
break; break;
case phi::DataType::FLOAT64:
r = xpu::cast_v2<XPUInTDType, double>(
dev_ctx.x_context(),
reinterpret_cast<const XPUInTDType*>(in_data),
dev_ctx.template Alloc<double>(out),
numel);
break;
default: default:
PADDLE_THROW(phi::errors::Unavailable( PADDLE_THROW(phi::errors::Unavailable(
"Not supported cast %d -> %d", x.dtype(), out_dtype)); "Not supported cast %d -> %d", x.dtype(), out_dtype));
} }
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast_v2");
r,
XPU_SUCCESS,
phi::errors::External(
"XPU CAST API return wrong value[%d %s].", r, XPUAPIErrorMsg[r]));
} }
} // namespace phi } // namespace phi
...@@ -101,6 +105,8 @@ PD_REGISTER_KERNEL(cast, ...@@ -101,6 +105,8 @@ PD_REGISTER_KERNEL(cast,
float, float,
phi::dtype::float16, phi::dtype::float16,
int64_t, int64_t,
bool) { bool,
uint8_t,
double) {
kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED); kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED);
} }
...@@ -18,7 +18,6 @@ import sys ...@@ -18,7 +18,6 @@ import sys
sys.path.append("..") sys.path.append("..")
from op_test import OpTest
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from op_test_xpu import XPUOpTest from op_test_xpu import XPUOpTest
...@@ -88,7 +87,7 @@ class XPUTestAdadelta(XPUOpTestWrapper): ...@@ -88,7 +87,7 @@ class XPUTestAdadelta(XPUOpTestWrapper):
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
class TestAdadeltaOp2(OpTest): class TestAdadeltaOp2(XPUOpTest):
'''Test Adadelta op with default attribute values''' '''Test Adadelta op with default attribute values'''
def setUp(self): def setUp(self):
......
...@@ -36,6 +36,7 @@ typeid_dict = { ...@@ -36,6 +36,7 @@ typeid_dict = {
'float16': int(core.VarDesc.VarType.FP16), 'float16': int(core.VarDesc.VarType.FP16),
'bool': int(core.VarDesc.VarType.BOOL), 'bool': int(core.VarDesc.VarType.BOOL),
'uint8': int(core.VarDesc.VarType.UINT8), 'uint8': int(core.VarDesc.VarType.UINT8),
'float64': int(core.VarDesc.VarType.FP64),
} }
...@@ -47,7 +48,15 @@ class XPUTestCastOp(XPUOpTestWrapper): ...@@ -47,7 +48,15 @@ class XPUTestCastOp(XPUOpTestWrapper):
def dynamic_create_class(self): def dynamic_create_class(self):
base_class = self.TestCastOp base_class = self.TestCastOp
classes = [] classes = []
for out_type in {'float16', 'float32', 'int32', 'int64', 'uint8'}: for out_type in {
'float16',
'float32',
'int32',
'int64',
'uint8',
'bool',
'float64',
}:
class_name = 'XPUTestCastOp_outtype_' + out_type class_name = 'XPUTestCastOp_outtype_' + out_type
attr_dict = {'out_typename': out_type} attr_dict = {'out_typename': out_type}
classes.append([class_name, attr_dict]) classes.append([class_name, attr_dict])
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册