From 41917fb5f98921734a38feda90c5f85ec2f3465d Mon Sep 17 00:00:00 2001 From: QingshuChen Date: Fri, 25 Dec 2020 16:11:33 +0800 Subject: [PATCH] feat: support check_nan_inf for kunlun/xpu device (#29694) (#29898) * feat: support check_nan_inf for kunlun device * support kunlun stack * minor --- .../framework/details/nan_inf_utils_detail.cc | 27 +++++++++ paddle/fluid/operators/stack_op_xpu.cc | 60 +++++++------------ paddle/http.log | 0 .../tests/unittests/xpu/test_stack_op_xpu.py | 7 ++- 4 files changed, 53 insertions(+), 41 deletions(-) delete mode 100644 paddle/http.log diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.cc b/paddle/fluid/framework/details/nan_inf_utils_detail.cc index 797a254c951..776ed9ef8eb 100644 --- a/paddle/fluid/framework/details/nan_inf_utils_detail.cc +++ b/paddle/fluid/framework/details/nan_inf_utils_detail.cc @@ -333,6 +333,33 @@ void CheckVarHasNanOrInf(const std::string& op_type, PADDLE_THROW(platform::errors::PreconditionNotMet( "Tensor[%s] use gpu place. PaddlePaddle must compile with GPU.", var_name)); +#endif + return; + } else if (platform::is_xpu_place(tensor->place())) { +#ifdef PADDLE_WITH_XPU + if (tensor->type() != proto::VarType::FP32) { + return; + } + + float* cpu_data = new float[tensor->numel()]; + xpu_memcpy(cpu_data, tensor->data(), tensor->numel() * sizeof(float), + XPU_DEVICE_TO_HOST); + bool flag = false; + for (int i = 0; i < tensor->numel(); i++) { + if (isnan(cpu_data[i]) || isinf(cpu_data[i])) { + flag = true; + break; + } + } + delete[] cpu_data; + PADDLE_ENFORCE_NE( + flag, true, + platform::errors::Fatal("Operator %s output Tensor %s contains Inf.", + op_type, var_name)); +#else + PADDLE_THROW(platform::errors::PreconditionNotMet( + "Tensor[%s] use xpu place. PaddlePaddle must compile with XPU.", + var_name)); #endif return; } diff --git a/paddle/fluid/operators/stack_op_xpu.cc b/paddle/fluid/operators/stack_op_xpu.cc index 175bb94c70b..9929df6e309 100644 --- a/paddle/fluid/operators/stack_op_xpu.cc +++ b/paddle/fluid/operators/stack_op_xpu.cc @@ -28,50 +28,34 @@ class StackXPUKernel : public framework::OpKernel { auto* y = ctx.Output("Y"); int axis = ctx.Attr("axis"); if (axis < 0) { - axis += (x[0]->dims().size() + 1); + axis += x[0]->dims().size() + 1; } - int n = static_cast(x.size()); - PADDLE_ENFORCE_LE(n, 24, - platform::errors::InvalidArgument( - "XPU only surpport at most 24 tensors for now")); auto* y_data = y->mutable_data(ctx.GetPlace()); - int pre = 1, post = 1; + auto& dim = x[0]->dims(); - for (auto i = 0; i < axis; ++i) { - pre *= dim[i]; + std::vector xdims; + for (auto i = 0; i < dim.size(); ++i) { + xdims.push_back(dim[i]); } - for (auto i = axis; i < dim.size(); ++i) { - post *= dim[i]; + xdims.push_back(1); + std::vector> xdims_list; + int n = static_cast(x.size()); + for (int i = 0; i < n; i++) { + xdims_list.push_back(xdims); } - auto& dev_ctx = ctx.template device_context(); - void* x_datas_host = std::malloc(n * sizeof(void*)); - void* x_datas_device = nullptr; - PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast(&x_datas_device), - n * sizeof(void*)), - XPU_SUCCESS, - platform::errors::ResourceExhausted( - "\n\nOut of memory error on XPU, Cannot" - "allocate %s memory on XPU. \n\nPlease " - "check whether there is any other process " - "using XPU.\n", - string::HumanReadableSize(n * sizeof(void*)))); - for (auto i = 0; i < n; ++i) { - ((const void**)x_datas_host)[i] = x[i]->data(); + + std::vector x_list; + for (int i = 0; i < n; i++) { + x_list.push_back(x[i]->data()); } - memory::Copy(BOOST_GET_CONST(platform::XPUPlace, ctx.GetPlace()), - x_datas_device, platform::CPUPlace(), x_datas_host, - n * sizeof(void*)); - int r = xpu::stack_forward(dev_ctx.x_context(), pre, post, n, - x_datas_device, y_data); - PADDLE_ENFORCE_EQ( - r, xpu::Error_t::SUCCESS, - platform::errors::External( - "The stack XPU API return wrong value[%d], please check " - "where Baidu Kunlun Card is properly installed.", - r)); - dev_ctx.Wait(); - std::free(x_datas_host); - xpu_free(x_datas_device); + + auto& dev_ctx = ctx.template device_context(); + int r = + xpu::concat(dev_ctx.x_context(), x_list, y_data, xdims_list, axis); + PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, + platform::errors::External( + "The stack XPU API return wrong value[%d %s]", r, + XPUAPIErrorMsg[r])); } }; diff --git a/paddle/http.log b/paddle/http.log deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py index 13de73fef6f..7c546391f6f 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py @@ -19,18 +19,19 @@ import unittest import numpy as np import paddle.fluid.core as core from op_test import OpTest, skip_check_grad_ci +from op_test_xpu import XPUOpTest import paddle import paddle.fluid as fluid from paddle.fluid import Program, program_guard @skip_check_grad_ci(reason="There is no grad kernel for stack_xpu op.") -class TestStackOpBase(OpTest): +class TestStackOpBase(XPUOpTest): def initDefaultParameters(self): self.num_inputs = 4 self.input_dim = (5, 6, 7) self.axis = 0 - self.dtype = 'float64' + self.dtype = 'float32' def initParameters(self): pass @@ -73,7 +74,7 @@ class TestStackOp1(TestStackOpBase): class TestStackOp2(TestStackOpBase): def initParameters(self): - self.num_inputs = 20 + self.num_inputs = 30 class TestStackOp3(TestStackOpBase): -- GitLab