未验证 提交 5c1babde 编写于 作者: X xiaoting 提交者: GitHub

[cherry-pick] polish kunlun error message for 2.0 rc (#28048)

* polish error message,test=kunlun

* polish error,test=kunlun

* polish error,test=kunlun

* polish error,test=kunlun
上级 3f565903
......@@ -50,11 +50,17 @@ class GemmConvXPUKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ(
xpu::findmax(dev_ctx.x_context(), input->data<T>(), input->numel(),
max_input->data<T>()) == xpu::Error_t::SUCCESS,
true, platform::errors::InvalidArgument("XPU kernel error!"));
true, platform::errors::InvalidArgument(
"XPU conv kernel error,can not finde max_input,please "
"check whether Baidu Kunlun "
"Card is properly installed."));
PADDLE_ENFORCE_EQ(
xpu::findmax(dev_ctx.x_context(), filter.data<T>(), filter.numel(),
max_filter->data<T>()) == xpu::Error_t::SUCCESS,
true, platform::errors::InvalidArgument("XPU kernel error!"));
true, platform::errors::InvalidArgument(
"XPU conv kernel error,can not find max_filter,please "
"check whether Baidu Kunlun "
"Card is properly installed."));
if (groups == 1) {
int r = xpu::conv2d_forward_int16<float, float, float, float>(
dev_ctx.x_context(), batch_size, img_c, img_h, img_w, f, win_h, win_w,
......@@ -63,8 +69,12 @@ class GemmConvXPUKernel : public framework::OpKernel<T> {
output->data<float>(), nullptr, nullptr, xpu::Activation_t::LINEAR,
// nullptr, nullptr);
max_input->data<float>(), max_filter->data<float>());
PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true,
platform::errors::InvalidArgument("XPU kernel error!"));
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External("XPU conv kernel return wrong value[%d], "
"please check whether Baidu Kunlun Card "
"is properly installed.",
r));
} else {
int r = xpu::conv2d_int16_with_group<float, float, float>(
dev_ctx.x_context(), input->data<float>(), filter.data<float>(),
......@@ -72,8 +82,12 @@ class GemmConvXPUKernel : public framework::OpKernel<T> {
win_w, groups, strides[0], strides[1], paddings[0], paddings[1],
// nullptr, nullptr);
max_input->data<float>(), max_filter->data<float>());
PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true,
platform::errors::InvalidArgument("XPU kernel error!"));
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External("XPU conv kernel return wrong value[%d], "
"please check whether Baidu Kunlun Card "
"is properly installed.",
r));
}
}
};
......@@ -125,7 +139,11 @@ class GemmConvGradXPUKernel : public framework::OpKernel<T> {
xpu::findmax(dev_ctx.x_context(), output_grad->data<T>(),
output_grad->numel(),
max_output_grad->data<T>()) == xpu::Error_t::SUCCESS,
true, platform::errors::InvalidArgument("XPU kernel error!"));
true,
platform::errors::External(
"XPU conv kernel error, can not find max_output_grad, please check "
"whether Baidu Kunlun Card is "
"properly installed."));
if (input_grad) {
int r = xpu::conv2d_backward_int16(
dev_ctx.x_context(), batch_size, img_c, img_h, img_w, f, win_h, win_w,
......@@ -134,8 +152,12 @@ class GemmConvGradXPUKernel : public framework::OpKernel<T> {
filter.data<float>(), input_grad->data<float>(),
// nullptr, nullptr,
max_output_grad->data<float>(), max_filter->data<float>());
PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true,
platform::errors::InvalidArgument("XPU kernel error!"));
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External("XPU conv kernel return wrong value[%d], "
"please check whether Baidu Kunlun Card "
"is properly installed.",
r));
}
if (filter_grad) {
int r = xpu::conv2d_backward_weight_int16(
......@@ -145,8 +167,12 @@ class GemmConvGradXPUKernel : public framework::OpKernel<T> {
input->data<float>(), filter_grad->data<float>(),
// nullptr, nullptr,
max_output_grad->data<float>(), max_input->data<float>());
PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true,
platform::errors::InvalidArgument("XPU kernel error!"));
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External("XPU conv kernel return wrong value[%d], "
"please check whether Baidu Kunlun Card "
"is properly installed.",
r));
}
}
};
......
......@@ -64,10 +64,16 @@ class DropoutXPUKernel : public framework::OpKernel<T> {
}
}
}
PADDLE_ENFORCE(
PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void**>(&mask_data_table),
max_data_size * sizeof(float)) == xpu::Error_t::SUCCESS,
"XPU no enough memory");
max_data_size * sizeof(float)),
XPU_SUCCESS,
platform::errors::ResourceExhausted(
"\n\nOut of memory error on XPU, Cannot"
"allocate %s memory on XPU. \n\nPlease "
"check whether there is any other process "
"using XPU.\n",
string::HumanReadableSize(max_data_size * sizeof(void*))));
memory::Copy(BOOST_GET_CONST(platform::XPUPlace, context.GetPlace()),
mask_data_table, platform::CPUPlace(), mask_data_host,
max_data_size * sizeof(float));
......@@ -84,8 +90,12 @@ class DropoutXPUKernel : public framework::OpKernel<T> {
auto& dev_ctx = context.template device_context<DeviceContext>();
int r = xpu::dropout(dev_ctx.x_context(), mask_data_table, x_data,
mask_data, y_data, max_data_size, size);
PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true,
platform::errors::InvalidArgument("XPU kernel error!"));
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::External(
"XPU dropout return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
} else { // Infer
float scale = 0.0f;
if (dropout_implementation == "upscale_in_train") {
......@@ -96,8 +106,12 @@ class DropoutXPUKernel : public framework::OpKernel<T> {
auto& dev_ctx = context.template device_context<DeviceContext>();
int r = xpu::scale(dev_ctx.x_context(), x->numel(), scale, 0.0f, 0,
x_data, y_data);
PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true,
platform::errors::InvalidArgument("XPU kernel error!"));
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::External(
"XPU dropout return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
}
}
};
......@@ -105,8 +119,9 @@ template <typename DeviceContext, typename T>
class DropoutGradXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
PADDLE_ENFORCE(!context.Attr<bool>("is_test"),
"GradOp is only callable when is_test is false");
PADDLE_ENFORCE_EQ(!context.Attr<bool>("is_test"), true,
platform::errors::InvalidArgument(
"GradOp is only callable when is_test is false"));
auto* grad_x = context.Output<Tensor>(framework::GradVarName("X"));
auto* grad_y = context.Input<Tensor>(framework::GradVarName("Out"));
auto* mask = context.Input<Tensor>("Mask");
......@@ -115,8 +130,12 @@ class DropoutGradXPUKernel : public framework::OpKernel<T> {
int r = xpu::elementwise_mul(dev_ctx.x_context(), grad_y->data<T>(),
mask->data<T>(), grad_x->data<T>(),
grad_y->numel());
PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true,
platform::errors::InvalidArgument("XPU kernel error!"));
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::External(
"XPU dropout return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
}
};
} // namespace operators
......
......@@ -29,8 +29,12 @@ class LogLossXPUKernel : public framework::OpKernel<T> {
int r =
xpu::log_loss_fwd(dev_ctx.x_context(), n, epsilon, predict->data<T>(),
labels->data<T>(), loss->data<T>());
PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true,
platform::errors::InvalidArgument("XPU kernel error!"));
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::External(
"XPU log_loss kernel return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
}
};
template <typename DeviceContext, typename T, typename AttrType = T>
......@@ -51,8 +55,12 @@ class LogLossGradXPUKernel : public framework::OpKernel<T> {
int r = xpu::log_loss_bwd(dev_ctx.x_context(), n, epsilon,
predict->data<T>(), labels->data<T>(),
dloss->data<T>(), dpred->data<T>());
PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true,
platform::errors::InvalidArgument("XPU kernel error!"));
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::External(
"XPU log_loss kernel return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
}
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册