未验证 提交 982d5ff7 编写于 作者: Z zhangyikun02 提交者: GitHub

cast and gradient_accumulator support double for xpu, test=kunlun (#47800)

上级 635958d9
......@@ -31,6 +31,7 @@
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#ifdef PADDLE_WITH_XPU
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "xpu/refactor/math.h"
#endif
#ifdef PADDLE_WITH_ASCEND_CL
......@@ -92,13 +93,30 @@ void XPUTensorAddFunctor(const platform::Place& place,
platform::DeviceContextPool::Instance().Get(place));
const XPUType* x = reinterpret_cast<const XPUType*>(src.data<T>());
XPUType* y = reinterpret_cast<XPUType*>(dst->mutable_data<T>(place));
int r = xpu::add<XPUType>(
ctx->x_context(), x, y, y, static_cast<int>(src.numel()));
PADDLE_ENFORCE_EQ(
r,
XPU_SUCCESS,
platform::errors::External(
"XPU add kernel return wrong value[%d %s]", r, XPUAPIErrorMsg[r]));
int r = -1;
int numel = static_cast<int>(src.numel());
if (std::is_same<T, double>::value) {
xpu::ctx_guard RAII_GUARD(ctx->x_context());
float* x_cast_to_fp32 = RAII_GUARD.alloc<float>(numel);
PADDLE_ENFORCE_XDNN_NOT_NULL(x_cast_to_fp32);
float* y_cast_to_fp32 = RAII_GUARD.alloc<float>(numel);
PADDLE_ENFORCE_XDNN_NOT_NULL(y_cast_to_fp32);
r = xpu::cast<XPUType, float>(ctx->x_context(), x, x_cast_to_fp32, numel);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast");
r = xpu::cast<XPUType, float>(ctx->x_context(), y, y_cast_to_fp32, numel);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast");
r = xpu::add<float>(ctx->x_context(),
x_cast_to_fp32,
y_cast_to_fp32,
y_cast_to_fp32,
numel);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "add");
r = xpu::cast<float, XPUType>(ctx->x_context(), y_cast_to_fp32, y, numel);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast");
} else {
r = xpu::add<XPUType>(ctx->x_context(), x, y, y, numel);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "add");
}
}
#endif
......@@ -286,6 +304,8 @@ void TensorAdd(const VarType& src, VarType* dst) {
} else if (data_type ==
framework::DataTypeTrait<platform::float16>::DataType()) {
XPUTensorAddFunctor<platform::float16>(place, src_tensor, dst_tensor);
} else if (data_type == framework::DataTypeTrait<double>::DataType()) {
XPUTensorAddFunctor<double>(place, src_tensor, dst_tensor);
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Gradient accumulation of data type (%s) on place (%s) is not "
......
......@@ -103,7 +103,9 @@ XPUOpMap& get_kl2_ops() {
{"cast",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace()),
pOpKernelType(vartype::FP64, XPUPlace()),
pOpKernelType(vartype::BOOL, XPUPlace()),
pOpKernelType(vartype::UINT8, XPUPlace()),
pOpKernelType(vartype::INT64, XPUPlace()),
pOpKernelType(vartype::INT32, XPUPlace())})},
{"check_finite_and_unscale",
......
......@@ -14,6 +14,7 @@
#include "paddle/phi/kernels/cast_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/enforce.h"
......@@ -80,16 +81,19 @@ void CastKernel(const Context& dev_ctx,
dev_ctx.template Alloc<uint8_t>(out),
numel);
break;
case phi::DataType::FLOAT64:
r = xpu::cast_v2<XPUInTDType, double>(
dev_ctx.x_context(),
reinterpret_cast<const XPUInTDType*>(in_data),
dev_ctx.template Alloc<double>(out),
numel);
break;
default:
PADDLE_THROW(phi::errors::Unavailable(
"Not supported cast %d -> %d", x.dtype(), out_dtype));
}
PADDLE_ENFORCE_EQ(
r,
XPU_SUCCESS,
phi::errors::External(
"XPU CAST API return wrong value[%d %s].", r, XPUAPIErrorMsg[r]));
PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast_v2");
}
} // namespace phi
......@@ -101,6 +105,8 @@ PD_REGISTER_KERNEL(cast,
float,
phi::dtype::float16,
int64_t,
bool) {
bool,
uint8_t,
double) {
kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED);
}
......@@ -18,7 +18,6 @@ import sys
sys.path.append("..")
from op_test import OpTest
import paddle
import paddle.fluid as fluid
from op_test_xpu import XPUOpTest
......@@ -88,7 +87,7 @@ class XPUTestAdadelta(XPUOpTestWrapper):
def test_check_output(self):
self.check_output()
class TestAdadeltaOp2(OpTest):
class TestAdadeltaOp2(XPUOpTest):
'''Test Adadelta op with default attribute values'''
def setUp(self):
......
......@@ -36,6 +36,7 @@ typeid_dict = {
'float16': int(core.VarDesc.VarType.FP16),
'bool': int(core.VarDesc.VarType.BOOL),
'uint8': int(core.VarDesc.VarType.UINT8),
'float64': int(core.VarDesc.VarType.FP64),
}
......@@ -47,7 +48,15 @@ class XPUTestCastOp(XPUOpTestWrapper):
def dynamic_create_class(self):
base_class = self.TestCastOp
classes = []
for out_type in {'float16', 'float32', 'int32', 'int64', 'uint8'}:
for out_type in {
'float16',
'float32',
'int32',
'int64',
'uint8',
'bool',
'float64',
}:
class_name = 'XPUTestCastOp_outtype_' + out_type
attr_dict = {'out_typename': out_type}
classes.append([class_name, attr_dict])
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册