From c1eed1fa2428335fe9d03a989e7312187dfbf988 Mon Sep 17 00:00:00 2001 From: Double_V Date: Mon, 19 Oct 2020 10:56:00 +0800 Subject: [PATCH] error message opt for XPU, test=kunlun (#27972) * add stack pool2d roi_align xpu op,test=kunlun * error message opt, test=kunlun * add xpu unittest,test=kunlun * skip check grad,test=kunlun * fix boostget , test=kunlun * error message opt for XPU, test=kunlun --- paddle/fluid/operators/pool_op_xpu.cc | 52 ++++++++++++++-------- paddle/fluid/operators/roi_align_op_xpu.cc | 18 +++++--- paddle/fluid/operators/stack_op_xpu.cc | 21 ++++++--- 3 files changed, 62 insertions(+), 29 deletions(-) diff --git a/paddle/fluid/operators/pool_op_xpu.cc b/paddle/fluid/operators/pool_op_xpu.cc index 746f7054aa2..325b7359389 100644 --- a/paddle/fluid/operators/pool_op_xpu.cc +++ b/paddle/fluid/operators/pool_op_xpu.cc @@ -43,12 +43,14 @@ class PoolXPUKernel : public framework::OpKernel { bool exclusive = context.Attr("exclusive"); bool is_test = context.Attr("is_test"); bool adaptive = context.Attr("adaptive"); - PADDLE_ENFORCE_EQ(!adaptive, true, - platform::errors::InvalidArgument( - "XPU does not support adaptive == true!")); - PADDLE_ENFORCE_EQ(ksize.size(), 2, - platform::errors::InvalidArgument( - "XPU only support 2 dimension pooling!")); + PADDLE_ENFORCE_EQ( + !adaptive, true, + platform::errors::InvalidArgument( + "The Pool2d XPU OP does not support adaptive == true!")); + PADDLE_ENFORCE_EQ( + ksize.size(), 2, + platform::errors::InvalidArgument( + "The Pool2d XPU OP only support 2 dimension pooling!")); int* index_data = nullptr; if (context.Attr("global_pooling")) { for (size_t i = 0; i < ksize.size(); ++i) { @@ -80,7 +82,10 @@ class PoolXPUKernel : public framework::OpKernel { stride_w, out_h, out_w); PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, - platform::errors::InvalidArgument("pool2d XPU kernel error!")); + platform::errors::External( + "The pool2d XPU API return wrong value[%d], please check " + "where Baidu Kunlun Card is properly installed.", + r)); } }; template @@ -99,12 +104,15 @@ class PoolGradXPUKernel : public framework::OpKernel { bool exclusive = context.Attr("exclusive"); bool adaptive = context.Attr("adaptive"); const int* index_data = nullptr; - PADDLE_ENFORCE_EQ(!adaptive, true, - platform::errors::InvalidArgument( - "XPU does not support adaptive == true!")); - PADDLE_ENFORCE_EQ(ksize.size(), 2, - platform::errors::InvalidArgument( - "XPU only support 2 dimension pooling!")); + PADDLE_ENFORCE_EQ( + !adaptive, true, + platform::errors::InvalidArgument( + "The Pool2d XPU OP does not support adaptive == true!")); + PADDLE_ENFORCE_EQ(ksize.size(), 2, platform::errors::InvalidArgument( + "The Pool2d XPU OP only support 2 " + "dimension pooling!, but received " + "%d-dimension pool kernel size", + ksize.size())); if (context.Attr("global_pooling")) { for (size_t i = 0; i < ksize.size(); ++i) { paddings[i] = 0; @@ -139,16 +147,22 @@ class PoolGradXPUKernel : public framework::OpKernel { int r = xpu::memset(dev_ctx.x_context(), reinterpret_cast(input_grad), zero, in_x_grad->numel() * sizeof(float)); - PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, - platform::errors::InvalidArgument( - "There are pool2d grad XPU kernel error raised!")); + PADDLE_ENFORCE_EQ( + r, xpu::Error_t::SUCCESS, + platform::errors::External( + "The Pool2d XPU OP return wrong value[%d], please check " + "where Baidu Kunlun Card is properly installed.", + r)); r = xpu::pooling_backward(dev_ctx.x_context(), input, output, index_data, output_grad, input_grad, pool_type, c, in_h, in_w, pad_left, pad_right, pad_up, pad_down, win_h, win_w, stride_h, stride_w, out_h, out_w); - PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, - platform::errors::InvalidArgument( - "There are pool2d grad XPU kernel error raised!")); + PADDLE_ENFORCE_EQ( + r, xpu::Error_t::SUCCESS, + platform::errors::External( + "The Pool2d XPU OP return wrong value[%d], please check " + "where Baidu Kunlun Card is properly installed.", + r)); } }; diff --git a/paddle/fluid/operators/roi_align_op_xpu.cc b/paddle/fluid/operators/roi_align_op_xpu.cc index 3647f3a9eb4..2c3bfdbc16b 100644 --- a/paddle/fluid/operators/roi_align_op_xpu.cc +++ b/paddle/fluid/operators/roi_align_op_xpu.cc @@ -44,11 +44,16 @@ class XPUROIAlignOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( rois_batch_size, batch_size, platform::errors::InvalidArgument( - "The rois_batch_size and imgs batch_size must be the same.")); + "The rois_batch_size and imgs batch_size of roi_align_xpu OP must " + "be the same. But received rois_batch_size %d , batch_size %d", + rois_batch_size, batch_size)); int rois_num_with_lod = rois_lod[rois_batch_size]; - PADDLE_ENFORCE_EQ(rois_num, rois_num_with_lod, - platform::errors::InvalidArgument( - "The rois_num from input and lod must be the same.")); + PADDLE_ENFORCE_EQ( + rois_num, rois_num_with_lod, + platform::errors::InvalidArgument( + "The rois_num from input and lod of roi_align_xpu OP must be the " + "same. But received input rois_num %d , input lod %d", + rois_num, rois_num_with_lod)); T* output_data = out->mutable_data(ctx.GetPlace()); const T* rois_data = rois->data(); for (int n = 0; n < rois_batch_size; n++) { @@ -62,7 +67,10 @@ class XPUROIAlignOpKernel : public framework::OpKernel { rois_lod[n] * channels * pooled_height * pooled_width); PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, - platform::errors::InvalidArgument("roi_align XPU kernel error!")); + platform::errors::External( + "The roi_align XPU OP return wrong value[%d], please check " + "where Baidu Kunlun Card is properly installed.", + r)); } } } diff --git a/paddle/fluid/operators/stack_op_xpu.cc b/paddle/fluid/operators/stack_op_xpu.cc index f847f19f830..175bb94c70b 100644 --- a/paddle/fluid/operators/stack_op_xpu.cc +++ b/paddle/fluid/operators/stack_op_xpu.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/stack_op.h" +#include #ifdef PADDLE_WITH_XPU namespace paddle { @@ -45,8 +46,15 @@ class StackXPUKernel : public framework::OpKernel { auto& dev_ctx = ctx.template device_context(); void* x_datas_host = std::malloc(n * sizeof(void*)); void* x_datas_device = nullptr; - PADDLE_ENFORCE(xpu_malloc(reinterpret_cast(&x_datas_device), - n * sizeof(void*)) == XPU_SUCCESS); + PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast(&x_datas_device), + n * sizeof(void*)), + XPU_SUCCESS, + platform::errors::ResourceExhausted( + "\n\nOut of memory error on XPU, Cannot" + "allocate %s memory on XPU. \n\nPlease " + "check whether there is any other process " + "using XPU.\n", + string::HumanReadableSize(n * sizeof(void*)))); for (auto i = 0; i < n; ++i) { ((const void**)x_datas_host)[i] = x[i]->data(); } @@ -55,9 +63,12 @@ class StackXPUKernel : public framework::OpKernel { n * sizeof(void*)); int r = xpu::stack_forward(dev_ctx.x_context(), pre, post, n, x_datas_device, y_data); - PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, - platform::errors::InvalidArgument( - "There are stack XPU kernel error raised!")); + PADDLE_ENFORCE_EQ( + r, xpu::Error_t::SUCCESS, + platform::errors::External( + "The stack XPU API return wrong value[%d], please check " + "where Baidu Kunlun Card is properly installed.", + r)); dev_ctx.Wait(); std::free(x_datas_host); xpu_free(x_datas_device); -- GitLab