未验证 提交 91727ac8 编写于 作者: C Chengmo 提交者: GitHub

Fix xpu error message (#28061) (#28092)

* fix error message,test=kunlun

* fix, test=kunlun
上级 5c2852a3
...@@ -86,30 +86,46 @@ class ElementwiseAddGradXPUKernel : public ElemwiseGradKernel<T> { ...@@ -86,30 +86,46 @@ class ElementwiseAddGradXPUKernel : public ElemwiseGradKernel<T> {
int r = xpu::matrix_vector_add_grad( int r = xpu::matrix_vector_add_grad(
dev_ctx.x_context(), dout->data<T>(), dout->data<T>(), dev_ctx.x_context(), dout->data<T>(), dout->data<T>(),
dout->data<T>(), dout->data<T>(), dx_data, dy_data, pre, n); dout->data<T>(), dout->data<T>(), dx_data, dy_data, pre, n);
PADDLE_ENFORCE_EQ( if (r == xpu::Error_t::INVALID_PARAM) {
r, XPU_SUCCESS, PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::External( platform::errors::InvalidArgument(
"XPU API return wrong value[%d], please check whether " "XPU kernel error of ElementWiseAddOp, error "
"Baidu Kunlun Card is properly installed.", "message: INVALID_PARAM, "
r)); "please check your input & output."));
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::Unavailable(
"XPU kernel error of ElementWiseAddOp, error "
"message: RUNTIME_ERROR, "
"please check whether Baidu Kunlun card is "
"properly installed."));
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted(
"XPU kernel error of ElementWiseAddOp, error message: "
"NO_ENOUGH_WORKSPACE, XPU has no enough memory."));
}
return; return;
} }
if (dx == nullptr) { if (dx == nullptr) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void **>(&dx_data), len * sizeof(float)), xpu_malloc(reinterpret_cast<void **>(&dx_data), len * sizeof(float)),
XPU_SUCCESS, platform::errors::External("XPU has no enough memory")); XPU_SUCCESS,
platform::errors::ResourceExhausted("XPU has no enough memory"));
} }
if (dy == nullptr) { if (dy == nullptr) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void **>(&dy_data), len * sizeof(float)), xpu_malloc(reinterpret_cast<void **>(&dy_data), len * sizeof(float)),
XPU_SUCCESS, platform::errors::External("XPU has no enough memory")); XPU_SUCCESS,
platform::errors::ResourceExhausted("XPU has no enough memory"));
} else { } else {
if (len != n) { if (len != n) {
PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast<void **>(&dy_data), PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast<void **>(&dy_data),
len * sizeof(float)), len * sizeof(float)),
XPU_SUCCESS, platform::errors::External( XPU_SUCCESS, platform::errors::ResourceExhausted(
"XPU has no enough memory")); "XPU has no enough memory"));
} }
} }
...@@ -117,22 +133,50 @@ class ElementwiseAddGradXPUKernel : public ElemwiseGradKernel<T> { ...@@ -117,22 +133,50 @@ class ElementwiseAddGradXPUKernel : public ElemwiseGradKernel<T> {
int r = xpu::elementwise_add_grad( int r = xpu::elementwise_add_grad(
dev_ctx.x_context(), dout->data<T>() /*x*/, dout->data<T>() /*y*/, dev_ctx.x_context(), dout->data<T>() /*x*/, dout->data<T>() /*y*/,
dout->data<T>() /*out*/, dout->data<T>(), dx_data, dy_data, len); dout->data<T>() /*out*/, dout->data<T>(), dx_data, dy_data, len);
PADDLE_ENFORCE_EQ( if (r == xpu::Error_t::INVALID_PARAM) {
r, XPU_SUCCESS, PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::External( platform::errors::InvalidArgument(
"XPU API return wrong value[%d], please check whether " "XPU kernel error of ElementWiseAddOp, error "
"Baidu Kunlun Card is properly installed.", "message: INVALID_PARAM, "
r)); "please check your input & output."));
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::Unavailable(
"XPU kernel error of ElementWiseAddOp, error message: "
"RUNTIME_ERROR, "
"please check whether Baidu Kunlun card is properly installed."));
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted(
"XPU kernel error of ElementWiseAddOp, error message: "
"NO_ENOUGH_WORKSPACE, XPU has no enough memory."));
}
if ((dy != nullptr) && (len != n)) { if ((dy != nullptr) && (len != n)) {
r = xpu::reduce_ew(dev_ctx.x_context(), dy_data, dy->data<T>(), pre, n, r = xpu::reduce_ew(dev_ctx.x_context(), dy_data, dy->data<T>(), pre, n,
post, xpu::ElementwiseOp::ASSIGN); post, xpu::ElementwiseOp::ASSIGN);
PADDLE_ENFORCE_EQ( if (r == xpu::Error_t::INVALID_PARAM) {
r, XPU_SUCCESS, PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::External( platform::errors::InvalidArgument(
"XPU API return wrong value[%d], please check whether " "XPU kernel error of ElementWiseAddOp, error "
"Baidu Kunlun Card is properly installed.", "message: INVALID_PARAM, "
r)); "please check your input & output."));
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::Unavailable(
"XPU kernel error of ElementWiseAddOp, error "
"message: RUNTIME_ERROR, "
"please check whether Baidu Kunlun card is "
"properly installed."));
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted(
"XPU kernel error of ElementWiseAddOp, error message: "
"NO_ENOUGH_WORKSPACE, XPU has no enough memory."));
}
dev_ctx.Wait(); dev_ctx.Wait();
xpu_free(dy_data); xpu_free(dy_data);
} }
......
...@@ -48,8 +48,24 @@ class MomentumOpXPUKernel : public framework::OpKernel<T> { ...@@ -48,8 +48,24 @@ class MomentumOpXPUKernel : public framework::OpKernel<T> {
dev_ctx.x_context(), param->data<float>(), velocity->data<float>(), dev_ctx.x_context(), param->data<float>(), velocity->data<float>(),
grad->data<float>(), lr, use_nesterov, mu, param_out->numel(), grad->data<float>(), lr, use_nesterov, mu, param_out->numel(),
param_out->data<float>(), velocity_out->data<float>()); param_out->data<float>(), velocity_out->data<float>());
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, if (r == xpu::Error_t::INVALID_PARAM) {
platform::errors::PermissionDenied("XPU kernel error!")); PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::InvalidArgument(
"XPU kernel error of MomentumOp, error message: INVALID_PARAM, "
"please check your input & output."));
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::Unavailable(
"XPU kernel error of MomentumOp, error message: RUNTIME_ERROR, "
"please check whether Baidu Kunlun card is properly installed."));
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted(
"XPU kernel error of MomentumOp, error message: "
"NO_ENOUGH_WORKSPACE, XPU has no enough memory."));
}
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -56,9 +56,25 @@ class SGDOpXPUKernel : public framework::OpKernel<T> { ...@@ -56,9 +56,25 @@ class SGDOpXPUKernel : public framework::OpKernel<T> {
auto &dev_ctx = ctx.template device_context<DeviceContext>(); auto &dev_ctx = ctx.template device_context<DeviceContext>();
int r = xpu::sgd(dev_ctx.x_context(), sz, grad_data, param_data, lr, int r = xpu::sgd(dev_ctx.x_context(), sz, grad_data, param_data, lr,
out_data); out_data);
PADDLE_ENFORCE_EQ( if (r == xpu::Error_t::INVALID_PARAM) {
r, xpu::Error_t::SUCCESS, PADDLE_ENFORCE_EQ(
platform::errors::PermissionDenied("XPU kernel error!")); r, xpu::Error_t::SUCCESS,
platform::errors::InvalidArgument(
"XPU kernel error of SgdOp, error message: INVALID_PARAM, "
"please check your input & output."));
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::Unavailable(
"XPU kernel error of SgdOp, error message: "
"RUNTIME_ERROR, please check whether Baidu "
"Kunlun Card is properly installed."));
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted(
"XPU kernel error of SgdOp, error "
"message: NO_ENOUGH_WORKSPACE, XPU "
"has no enough memory."));
}
} else { } else {
PADDLE_ENFORCE_EQ(false, true, PADDLE_ENFORCE_EQ(false, true,
platform::errors::PermissionDenied( platform::errors::PermissionDenied(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册