未验证 提交 5c1babde 编写于 作者: X xiaoting 提交者: GitHub

[cherry-pick] polish kunlun error message for 2.0 rc (#28048)

* polish error message,test=kunlun

* polish error,test=kunlun

* polish error,test=kunlun

* polish error,test=kunlun
上级 3f565903
...@@ -50,11 +50,17 @@ class GemmConvXPUKernel : public framework::OpKernel<T> { ...@@ -50,11 +50,17 @@ class GemmConvXPUKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
xpu::findmax(dev_ctx.x_context(), input->data<T>(), input->numel(), xpu::findmax(dev_ctx.x_context(), input->data<T>(), input->numel(),
max_input->data<T>()) == xpu::Error_t::SUCCESS, max_input->data<T>()) == xpu::Error_t::SUCCESS,
true, platform::errors::InvalidArgument("XPU kernel error!")); true, platform::errors::InvalidArgument(
"XPU conv kernel error,can not finde max_input,please "
"check whether Baidu Kunlun "
"Card is properly installed."));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
xpu::findmax(dev_ctx.x_context(), filter.data<T>(), filter.numel(), xpu::findmax(dev_ctx.x_context(), filter.data<T>(), filter.numel(),
max_filter->data<T>()) == xpu::Error_t::SUCCESS, max_filter->data<T>()) == xpu::Error_t::SUCCESS,
true, platform::errors::InvalidArgument("XPU kernel error!")); true, platform::errors::InvalidArgument(
"XPU conv kernel error,can not find max_filter,please "
"check whether Baidu Kunlun "
"Card is properly installed."));
if (groups == 1) { if (groups == 1) {
int r = xpu::conv2d_forward_int16<float, float, float, float>( int r = xpu::conv2d_forward_int16<float, float, float, float>(
dev_ctx.x_context(), batch_size, img_c, img_h, img_w, f, win_h, win_w, dev_ctx.x_context(), batch_size, img_c, img_h, img_w, f, win_h, win_w,
...@@ -63,8 +69,12 @@ class GemmConvXPUKernel : public framework::OpKernel<T> { ...@@ -63,8 +69,12 @@ class GemmConvXPUKernel : public framework::OpKernel<T> {
output->data<float>(), nullptr, nullptr, xpu::Activation_t::LINEAR, output->data<float>(), nullptr, nullptr, xpu::Activation_t::LINEAR,
// nullptr, nullptr); // nullptr, nullptr);
max_input->data<float>(), max_filter->data<float>()); max_input->data<float>(), max_filter->data<float>());
PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, PADDLE_ENFORCE_EQ(
platform::errors::InvalidArgument("XPU kernel error!")); r, XPU_SUCCESS,
platform::errors::External("XPU conv kernel return wrong value[%d], "
"please check whether Baidu Kunlun Card "
"is properly installed.",
r));
} else { } else {
int r = xpu::conv2d_int16_with_group<float, float, float>( int r = xpu::conv2d_int16_with_group<float, float, float>(
dev_ctx.x_context(), input->data<float>(), filter.data<float>(), dev_ctx.x_context(), input->data<float>(), filter.data<float>(),
...@@ -72,8 +82,12 @@ class GemmConvXPUKernel : public framework::OpKernel<T> { ...@@ -72,8 +82,12 @@ class GemmConvXPUKernel : public framework::OpKernel<T> {
win_w, groups, strides[0], strides[1], paddings[0], paddings[1], win_w, groups, strides[0], strides[1], paddings[0], paddings[1],
// nullptr, nullptr); // nullptr, nullptr);
max_input->data<float>(), max_filter->data<float>()); max_input->data<float>(), max_filter->data<float>());
PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, PADDLE_ENFORCE_EQ(
platform::errors::InvalidArgument("XPU kernel error!")); r, XPU_SUCCESS,
platform::errors::External("XPU conv kernel return wrong value[%d], "
"please check whether Baidu Kunlun Card "
"is properly installed.",
r));
} }
} }
}; };
...@@ -125,7 +139,11 @@ class GemmConvGradXPUKernel : public framework::OpKernel<T> { ...@@ -125,7 +139,11 @@ class GemmConvGradXPUKernel : public framework::OpKernel<T> {
xpu::findmax(dev_ctx.x_context(), output_grad->data<T>(), xpu::findmax(dev_ctx.x_context(), output_grad->data<T>(),
output_grad->numel(), output_grad->numel(),
max_output_grad->data<T>()) == xpu::Error_t::SUCCESS, max_output_grad->data<T>()) == xpu::Error_t::SUCCESS,
true, platform::errors::InvalidArgument("XPU kernel error!")); true,
platform::errors::External(
"XPU conv kernel error, can not find max_output_grad, please check "
"whether Baidu Kunlun Card is "
"properly installed."));
if (input_grad) { if (input_grad) {
int r = xpu::conv2d_backward_int16( int r = xpu::conv2d_backward_int16(
dev_ctx.x_context(), batch_size, img_c, img_h, img_w, f, win_h, win_w, dev_ctx.x_context(), batch_size, img_c, img_h, img_w, f, win_h, win_w,
...@@ -134,8 +152,12 @@ class GemmConvGradXPUKernel : public framework::OpKernel<T> { ...@@ -134,8 +152,12 @@ class GemmConvGradXPUKernel : public framework::OpKernel<T> {
filter.data<float>(), input_grad->data<float>(), filter.data<float>(), input_grad->data<float>(),
// nullptr, nullptr, // nullptr, nullptr,
max_output_grad->data<float>(), max_filter->data<float>()); max_output_grad->data<float>(), max_filter->data<float>());
PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, PADDLE_ENFORCE_EQ(
platform::errors::InvalidArgument("XPU kernel error!")); r, XPU_SUCCESS,
platform::errors::External("XPU conv kernel return wrong value[%d], "
"please check whether Baidu Kunlun Card "
"is properly installed.",
r));
} }
if (filter_grad) { if (filter_grad) {
int r = xpu::conv2d_backward_weight_int16( int r = xpu::conv2d_backward_weight_int16(
...@@ -145,8 +167,12 @@ class GemmConvGradXPUKernel : public framework::OpKernel<T> { ...@@ -145,8 +167,12 @@ class GemmConvGradXPUKernel : public framework::OpKernel<T> {
input->data<float>(), filter_grad->data<float>(), input->data<float>(), filter_grad->data<float>(),
// nullptr, nullptr, // nullptr, nullptr,
max_output_grad->data<float>(), max_input->data<float>()); max_output_grad->data<float>(), max_input->data<float>());
PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, PADDLE_ENFORCE_EQ(
platform::errors::InvalidArgument("XPU kernel error!")); r, XPU_SUCCESS,
platform::errors::External("XPU conv kernel return wrong value[%d], "
"please check whether Baidu Kunlun Card "
"is properly installed.",
r));
} }
} }
}; };
......
...@@ -64,10 +64,16 @@ class DropoutXPUKernel : public framework::OpKernel<T> { ...@@ -64,10 +64,16 @@ class DropoutXPUKernel : public framework::OpKernel<T> {
} }
} }
} }
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void**>(&mask_data_table), xpu_malloc(reinterpret_cast<void**>(&mask_data_table),
max_data_size * sizeof(float)) == xpu::Error_t::SUCCESS, max_data_size * sizeof(float)),
"XPU no enough memory"); XPU_SUCCESS,
platform::errors::ResourceExhausted(
"\n\nOut of memory error on XPU, Cannot"
"allocate %s memory on XPU. \n\nPlease "
"check whether there is any other process "
"using XPU.\n",
string::HumanReadableSize(max_data_size * sizeof(void*))));
memory::Copy(BOOST_GET_CONST(platform::XPUPlace, context.GetPlace()), memory::Copy(BOOST_GET_CONST(platform::XPUPlace, context.GetPlace()),
mask_data_table, platform::CPUPlace(), mask_data_host, mask_data_table, platform::CPUPlace(), mask_data_host,
max_data_size * sizeof(float)); max_data_size * sizeof(float));
...@@ -84,8 +90,12 @@ class DropoutXPUKernel : public framework::OpKernel<T> { ...@@ -84,8 +90,12 @@ class DropoutXPUKernel : public framework::OpKernel<T> {
auto& dev_ctx = context.template device_context<DeviceContext>(); auto& dev_ctx = context.template device_context<DeviceContext>();
int r = xpu::dropout(dev_ctx.x_context(), mask_data_table, x_data, int r = xpu::dropout(dev_ctx.x_context(), mask_data_table, x_data,
mask_data, y_data, max_data_size, size); mask_data, y_data, max_data_size, size);
PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, PADDLE_ENFORCE_EQ(
platform::errors::InvalidArgument("XPU kernel error!")); r, xpu::Error_t::SUCCESS,
platform::errors::External(
"XPU dropout return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
} else { // Infer } else { // Infer
float scale = 0.0f; float scale = 0.0f;
if (dropout_implementation == "upscale_in_train") { if (dropout_implementation == "upscale_in_train") {
...@@ -96,8 +106,12 @@ class DropoutXPUKernel : public framework::OpKernel<T> { ...@@ -96,8 +106,12 @@ class DropoutXPUKernel : public framework::OpKernel<T> {
auto& dev_ctx = context.template device_context<DeviceContext>(); auto& dev_ctx = context.template device_context<DeviceContext>();
int r = xpu::scale(dev_ctx.x_context(), x->numel(), scale, 0.0f, 0, int r = xpu::scale(dev_ctx.x_context(), x->numel(), scale, 0.0f, 0,
x_data, y_data); x_data, y_data);
PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, PADDLE_ENFORCE_EQ(
platform::errors::InvalidArgument("XPU kernel error!")); r, xpu::Error_t::SUCCESS,
platform::errors::External(
"XPU dropout return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
} }
} }
}; };
...@@ -105,8 +119,9 @@ template <typename DeviceContext, typename T> ...@@ -105,8 +119,9 @@ template <typename DeviceContext, typename T>
class DropoutGradXPUKernel : public framework::OpKernel<T> { class DropoutGradXPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
PADDLE_ENFORCE(!context.Attr<bool>("is_test"), PADDLE_ENFORCE_EQ(!context.Attr<bool>("is_test"), true,
"GradOp is only callable when is_test is false"); platform::errors::InvalidArgument(
"GradOp is only callable when is_test is false"));
auto* grad_x = context.Output<Tensor>(framework::GradVarName("X")); auto* grad_x = context.Output<Tensor>(framework::GradVarName("X"));
auto* grad_y = context.Input<Tensor>(framework::GradVarName("Out")); auto* grad_y = context.Input<Tensor>(framework::GradVarName("Out"));
auto* mask = context.Input<Tensor>("Mask"); auto* mask = context.Input<Tensor>("Mask");
...@@ -115,8 +130,12 @@ class DropoutGradXPUKernel : public framework::OpKernel<T> { ...@@ -115,8 +130,12 @@ class DropoutGradXPUKernel : public framework::OpKernel<T> {
int r = xpu::elementwise_mul(dev_ctx.x_context(), grad_y->data<T>(), int r = xpu::elementwise_mul(dev_ctx.x_context(), grad_y->data<T>(),
mask->data<T>(), grad_x->data<T>(), mask->data<T>(), grad_x->data<T>(),
grad_y->numel()); grad_y->numel());
PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, PADDLE_ENFORCE_EQ(
platform::errors::InvalidArgument("XPU kernel error!")); r, xpu::Error_t::SUCCESS,
platform::errors::External(
"XPU dropout return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -29,8 +29,12 @@ class LogLossXPUKernel : public framework::OpKernel<T> { ...@@ -29,8 +29,12 @@ class LogLossXPUKernel : public framework::OpKernel<T> {
int r = int r =
xpu::log_loss_fwd(dev_ctx.x_context(), n, epsilon, predict->data<T>(), xpu::log_loss_fwd(dev_ctx.x_context(), n, epsilon, predict->data<T>(),
labels->data<T>(), loss->data<T>()); labels->data<T>(), loss->data<T>());
PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, PADDLE_ENFORCE_EQ(
platform::errors::InvalidArgument("XPU kernel error!")); r, xpu::Error_t::SUCCESS,
platform::errors::External(
"XPU log_loss kernel return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
} }
}; };
template <typename DeviceContext, typename T, typename AttrType = T> template <typename DeviceContext, typename T, typename AttrType = T>
...@@ -51,8 +55,12 @@ class LogLossGradXPUKernel : public framework::OpKernel<T> { ...@@ -51,8 +55,12 @@ class LogLossGradXPUKernel : public framework::OpKernel<T> {
int r = xpu::log_loss_bwd(dev_ctx.x_context(), n, epsilon, int r = xpu::log_loss_bwd(dev_ctx.x_context(), n, epsilon,
predict->data<T>(), labels->data<T>(), predict->data<T>(), labels->data<T>(),
dloss->data<T>(), dpred->data<T>()); dloss->data<T>(), dpred->data<T>());
PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, PADDLE_ENFORCE_EQ(
platform::errors::InvalidArgument("XPU kernel error!")); r, xpu::Error_t::SUCCESS,
platform::errors::External(
"XPU log_loss kernel return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
} }
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册