From 5c1babde566a2d08a79e3f9c7ab4d047422ad4a8 Mon Sep 17 00:00:00 2001 From: xiaoting <31891223+tink2123@users.noreply.github.com> Date: Mon, 19 Oct 2020 13:14:22 +0800 Subject: [PATCH] [cherry-pick] polish kunlun error message for 2.0 rc (#28048) * polish error message,test=kunlun * polish error,test=kunlun * polish error,test=kunlun * polish error,test=kunlun --- paddle/fluid/operators/conv_op_xpu.cc | 48 +++++++++++++++++------ paddle/fluid/operators/dropout_op_xpu.cc | 41 +++++++++++++------ paddle/fluid/operators/log_loss_op_xpu.cc | 16 ++++++-- 3 files changed, 79 insertions(+), 26 deletions(-) diff --git a/paddle/fluid/operators/conv_op_xpu.cc b/paddle/fluid/operators/conv_op_xpu.cc index 82efac62d97..21fe9ad9bab 100644 --- a/paddle/fluid/operators/conv_op_xpu.cc +++ b/paddle/fluid/operators/conv_op_xpu.cc @@ -50,11 +50,17 @@ class GemmConvXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( xpu::findmax(dev_ctx.x_context(), input->data(), input->numel(), max_input->data()) == xpu::Error_t::SUCCESS, - true, platform::errors::InvalidArgument("XPU kernel error!")); + true, platform::errors::InvalidArgument( + "XPU conv kernel error,can not finde max_input,please " + "check whether Baidu Kunlun " + "Card is properly installed.")); PADDLE_ENFORCE_EQ( xpu::findmax(dev_ctx.x_context(), filter.data(), filter.numel(), max_filter->data()) == xpu::Error_t::SUCCESS, - true, platform::errors::InvalidArgument("XPU kernel error!")); + true, platform::errors::InvalidArgument( + "XPU conv kernel error,can not find max_filter,please " + "check whether Baidu Kunlun " + "Card is properly installed.")); if (groups == 1) { int r = xpu::conv2d_forward_int16( dev_ctx.x_context(), batch_size, img_c, img_h, img_w, f, win_h, win_w, @@ -63,8 +69,12 @@ class GemmConvXPUKernel : public framework::OpKernel { output->data(), nullptr, nullptr, xpu::Activation_t::LINEAR, // nullptr, nullptr); max_input->data(), max_filter->data()); - PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, - platform::errors::InvalidArgument("XPU kernel error!")); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU conv kernel return wrong value[%d], " + "please check whether Baidu Kunlun Card " + "is properly installed.", + r)); } else { int r = xpu::conv2d_int16_with_group( dev_ctx.x_context(), input->data(), filter.data(), @@ -72,8 +82,12 @@ class GemmConvXPUKernel : public framework::OpKernel { win_w, groups, strides[0], strides[1], paddings[0], paddings[1], // nullptr, nullptr); max_input->data(), max_filter->data()); - PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, - platform::errors::InvalidArgument("XPU kernel error!")); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU conv kernel return wrong value[%d], " + "please check whether Baidu Kunlun Card " + "is properly installed.", + r)); } } }; @@ -125,7 +139,11 @@ class GemmConvGradXPUKernel : public framework::OpKernel { xpu::findmax(dev_ctx.x_context(), output_grad->data(), output_grad->numel(), max_output_grad->data()) == xpu::Error_t::SUCCESS, - true, platform::errors::InvalidArgument("XPU kernel error!")); + true, + platform::errors::External( + "XPU conv kernel error, can not find max_output_grad, please check " + "whether Baidu Kunlun Card is " + "properly installed.")); if (input_grad) { int r = xpu::conv2d_backward_int16( dev_ctx.x_context(), batch_size, img_c, img_h, img_w, f, win_h, win_w, @@ -134,8 +152,12 @@ class GemmConvGradXPUKernel : public framework::OpKernel { filter.data(), input_grad->data(), // nullptr, nullptr, max_output_grad->data(), max_filter->data()); - PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, - platform::errors::InvalidArgument("XPU kernel error!")); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU conv kernel return wrong value[%d], " + "please check whether Baidu Kunlun Card " + "is properly installed.", + r)); } if (filter_grad) { int r = xpu::conv2d_backward_weight_int16( @@ -145,8 +167,12 @@ class GemmConvGradXPUKernel : public framework::OpKernel { input->data(), filter_grad->data(), // nullptr, nullptr, max_output_grad->data(), max_input->data()); - PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, - platform::errors::InvalidArgument("XPU kernel error!")); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU conv kernel return wrong value[%d], " + "please check whether Baidu Kunlun Card " + "is properly installed.", + r)); } } }; diff --git a/paddle/fluid/operators/dropout_op_xpu.cc b/paddle/fluid/operators/dropout_op_xpu.cc index 506239fd2bc..f5d831fa240 100644 --- a/paddle/fluid/operators/dropout_op_xpu.cc +++ b/paddle/fluid/operators/dropout_op_xpu.cc @@ -64,10 +64,16 @@ class DropoutXPUKernel : public framework::OpKernel { } } } - PADDLE_ENFORCE( + PADDLE_ENFORCE_EQ( xpu_malloc(reinterpret_cast(&mask_data_table), - max_data_size * sizeof(float)) == xpu::Error_t::SUCCESS, - "XPU no enough memory"); + max_data_size * sizeof(float)), + XPU_SUCCESS, + platform::errors::ResourceExhausted( + "\n\nOut of memory error on XPU, Cannot" + "allocate %s memory on XPU. \n\nPlease " + "check whether there is any other process " + "using XPU.\n", + string::HumanReadableSize(max_data_size * sizeof(void*)))); memory::Copy(BOOST_GET_CONST(platform::XPUPlace, context.GetPlace()), mask_data_table, platform::CPUPlace(), mask_data_host, max_data_size * sizeof(float)); @@ -84,8 +90,12 @@ class DropoutXPUKernel : public framework::OpKernel { auto& dev_ctx = context.template device_context(); int r = xpu::dropout(dev_ctx.x_context(), mask_data_table, x_data, mask_data, y_data, max_data_size, size); - PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, - platform::errors::InvalidArgument("XPU kernel error!")); + PADDLE_ENFORCE_EQ( + r, xpu::Error_t::SUCCESS, + platform::errors::External( + "XPU dropout return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + r)); } else { // Infer float scale = 0.0f; if (dropout_implementation == "upscale_in_train") { @@ -96,8 +106,12 @@ class DropoutXPUKernel : public framework::OpKernel { auto& dev_ctx = context.template device_context(); int r = xpu::scale(dev_ctx.x_context(), x->numel(), scale, 0.0f, 0, x_data, y_data); - PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, - platform::errors::InvalidArgument("XPU kernel error!")); + PADDLE_ENFORCE_EQ( + r, xpu::Error_t::SUCCESS, + platform::errors::External( + "XPU dropout return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + r)); } } }; @@ -105,8 +119,9 @@ template class DropoutGradXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - PADDLE_ENFORCE(!context.Attr("is_test"), - "GradOp is only callable when is_test is false"); + PADDLE_ENFORCE_EQ(!context.Attr("is_test"), true, + platform::errors::InvalidArgument( + "GradOp is only callable when is_test is false")); auto* grad_x = context.Output(framework::GradVarName("X")); auto* grad_y = context.Input(framework::GradVarName("Out")); auto* mask = context.Input("Mask"); @@ -115,8 +130,12 @@ class DropoutGradXPUKernel : public framework::OpKernel { int r = xpu::elementwise_mul(dev_ctx.x_context(), grad_y->data(), mask->data(), grad_x->data(), grad_y->numel()); - PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, - platform::errors::InvalidArgument("XPU kernel error!")); + PADDLE_ENFORCE_EQ( + r, xpu::Error_t::SUCCESS, + platform::errors::External( + "XPU dropout return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + r)); } }; } // namespace operators diff --git a/paddle/fluid/operators/log_loss_op_xpu.cc b/paddle/fluid/operators/log_loss_op_xpu.cc index 80e5f8ec401..b2e68e9870d 100644 --- a/paddle/fluid/operators/log_loss_op_xpu.cc +++ b/paddle/fluid/operators/log_loss_op_xpu.cc @@ -29,8 +29,12 @@ class LogLossXPUKernel : public framework::OpKernel { int r = xpu::log_loss_fwd(dev_ctx.x_context(), n, epsilon, predict->data(), labels->data(), loss->data()); - PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, - platform::errors::InvalidArgument("XPU kernel error!")); + PADDLE_ENFORCE_EQ( + r, xpu::Error_t::SUCCESS, + platform::errors::External( + "XPU log_loss kernel return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + r)); } }; template @@ -51,8 +55,12 @@ class LogLossGradXPUKernel : public framework::OpKernel { int r = xpu::log_loss_bwd(dev_ctx.x_context(), n, epsilon, predict->data(), labels->data(), dloss->data(), dpred->data()); - PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, - platform::errors::InvalidArgument("XPU kernel error!")); + PADDLE_ENFORCE_EQ( + r, xpu::Error_t::SUCCESS, + platform::errors::External( + "XPU log_loss kernel return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + r)); } }; -- GitLab