From a5aa4dc7a92b894050efbd46eb78ab5f938434dd Mon Sep 17 00:00:00 2001 From: taixiurong Date: Wed, 25 Nov 2020 15:37:46 +0800 Subject: [PATCH] add xpu elementwise ops (#29031) --- .../elementwise/elementwise_add_op_xpu.cc | 158 +----- .../elementwise/elementwise_div_op_xpu.cc | 16 +- .../elementwise_floordiv_op_xpu.cc | 37 ++ .../elementwise/elementwise_max_op_xpu.cc | 16 +- .../elementwise/elementwise_min_op_xpu.cc | 49 ++ .../elementwise/elementwise_mul_op_xpu.cc | 12 +- .../elementwise/elementwise_pow_op_xpu.cc | 40 ++ .../elementwise/elementwise_sub_op_xpu.cc | 17 +- .../operators/elementwise/elementwise_xpu.h | 471 ++++++++++-------- .../softmax_with_cross_entropy_op_xpu.cc | 66 ++- .../fluid/tests/unittests/xpu/elementwise.py | 100 ---- .../xpu/test_elementwise_add_op_xpu.py | 139 +++--- .../xpu/test_elementwise_div_op_xpu.py | 228 ++++++--- .../xpu/test_elementwise_floordiv_op_xpu.py | 87 ++++ .../xpu/test_elementwise_max_op_xpu.py | 180 ++++--- .../xpu/test_elementwise_min_op_xpu.py | 180 +++++++ .../xpu/test_elementwise_mul_op_xpu.py | 246 ++++++--- .../xpu/test_elementwise_pow_op_xpu.py | 182 +++++++ .../xpu/test_elementwise_sub_op_xpu.py | 191 +++++-- .../test_softmax_with_cross_entropy_op_xpu.py | 267 +++++----- 20 files changed, 1716 insertions(+), 966 deletions(-) create mode 100644 paddle/fluid/operators/elementwise/elementwise_floordiv_op_xpu.cc create mode 100644 paddle/fluid/operators/elementwise/elementwise_min_op_xpu.cc create mode 100644 paddle/fluid/operators/elementwise/elementwise_pow_op_xpu.cc delete mode 100644 python/paddle/fluid/tests/unittests/xpu/elementwise.py create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_elementwise_floordiv_op_xpu.py create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_elementwise_min_op_xpu.py create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_elementwise_pow_op_xpu.py diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc b/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc index ad4a16c6e06..625e66d5f39 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc @@ -27,7 +27,7 @@ template class ElementwiseAddXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - XPUElementwise>(ctx); + XPUElementwise(ctx, xpu::add); } }; @@ -36,161 +36,7 @@ class ElementwiseAddGradXPUKernel : public ElemwiseGradKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { ElemwiseGradKernel::Compute(ctx); - using Tensor = framework::Tensor; - - auto *dout = ctx.Input(framework::GradVarName("Out")); - auto *dx = ctx.Output(framework::GradVarName("X")); - auto *dy = ctx.Output(framework::GradVarName("Y")); - - auto dx_dims = dout->dims(); - auto dy_dims_untrimed = dout->dims(); - T *dx_data = NULL; - T *dy_data = NULL; - - int axis = ctx.Attr("axis"); - PADDLE_ENFORCE_GE(dx_dims.size(), dy_dims_untrimed.size(), - platform::errors::InvalidArgument( - "Rank of first input must >= rank of second input.")); - - if (dx != nullptr) { - dx->mutable_data(ctx.GetPlace()); - dx_dims = dx->dims(); - dx_data = dx->data(); - } - - if (dy != nullptr) { - dy->mutable_data(ctx.GetPlace()); - dy_dims_untrimed = dy->dims(); - dy_data = dy->data(); - } - - int pre, n, post, is_common_broadcast; - if (dx_dims == dy_dims_untrimed) { - pre = post = 1; - n = dout->numel(); - } else { - axis = (axis == -1 ? dx_dims.size() - dy_dims_untrimed.size() : axis); - PADDLE_ENFORCE_EQ(axis >= 0 && axis < dx_dims.size(), true, - platform::errors::InvalidArgument( - "Axis should be in range [0, dx_dims)")); - auto dy_dims = trim_trailing_singular_dims(dy_dims_untrimed); - axis = (dy_dims.size() == 0) ? dx_dims.size() : axis; - get_mid_dims(dx_dims, dy_dims, axis, &pre, &n, &post, - &is_common_broadcast); - } - int len = pre * n * post; - - auto &dev_ctx = - ctx.template device_context(); - if (post == 1) { - int r = xpu::matrix_vector_add_grad( - dev_ctx.x_context(), dout->data(), dout->data(), - dout->data(), dout->data(), dx_data, dy_data, pre, n); - if (r == xpu::Error_t::INVALID_PARAM) { - PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, - platform::errors::InvalidArgument( - "XPU kernel error of ElementWiseAddOp, error " - "message: INVALID_PARAM, " - "please check your input & output.")); - } else if (r == xpu::Error_t::RUNTIME_ERROR) { - PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, - platform::errors::Unavailable( - "XPU kernel error of ElementWiseAddOp, error " - "message: RUNTIME_ERROR, " - "please check whether Baidu Kunlun card is " - "properly installed.")); - } else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) { - PADDLE_ENFORCE_EQ( - r, xpu::Error_t::SUCCESS, - platform::errors::ResourceExhausted( - "XPU kernel error of ElementWiseAddOp, error message: " - "NO_ENOUGH_WORKSPACE, XPU has no enough memory.")); - } - return; - } - - if (dx == nullptr) { - PADDLE_ENFORCE_EQ( - xpu_malloc(reinterpret_cast(&dx_data), len * sizeof(float)), - XPU_SUCCESS, - platform::errors::ResourceExhausted("XPU has no enough memory")); - } - - if (dy == nullptr) { - PADDLE_ENFORCE_EQ( - xpu_malloc(reinterpret_cast(&dy_data), len * sizeof(float)), - XPU_SUCCESS, - platform::errors::ResourceExhausted("XPU has no enough memory")); - } else { - if (len != n) { - PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast(&dy_data), - len * sizeof(float)), - XPU_SUCCESS, platform::errors::ResourceExhausted( - "XPU has no enough memory")); - } - } - - int r = xpu::elementwise_add_grad( - dev_ctx.x_context(), dout->data() /*x*/, dout->data() /*y*/, - dout->data() /*out*/, dout->data(), dx_data, dy_data, len); - if (r == xpu::Error_t::INVALID_PARAM) { - PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, - platform::errors::InvalidArgument( - "XPU kernel error of ElementWiseAddOp, error " - "message: INVALID_PARAM, " - "please check your input & output.")); - } else if (r == xpu::Error_t::RUNTIME_ERROR) { - PADDLE_ENFORCE_EQ( - r, xpu::Error_t::SUCCESS, - platform::errors::Unavailable( - "XPU kernel error of ElementWiseAddOp, error message: " - "RUNTIME_ERROR, " - "please check whether Baidu Kunlun card is properly installed.")); - } else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) { - PADDLE_ENFORCE_EQ( - r, xpu::Error_t::SUCCESS, - platform::errors::ResourceExhausted( - "XPU kernel error of ElementWiseAddOp, error message: " - "NO_ENOUGH_WORKSPACE, XPU has no enough memory.")); - } - - if ((dy != nullptr) && (len != n)) { - r = xpu::reduce_ew(dev_ctx.x_context(), dy_data, dy->data(), pre, n, - post, xpu::ElementwiseOp::ASSIGN); - if (r == xpu::Error_t::INVALID_PARAM) { - PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, - platform::errors::InvalidArgument( - "XPU kernel error of ElementWiseAddOp, error " - "message: INVALID_PARAM, " - "please check your input & output.")); - } else if (r == xpu::Error_t::RUNTIME_ERROR) { - PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, - platform::errors::Unavailable( - "XPU kernel error of ElementWiseAddOp, error " - "message: RUNTIME_ERROR, " - "please check whether Baidu Kunlun card is " - "properly installed.")); - } else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) { - PADDLE_ENFORCE_EQ( - r, xpu::Error_t::SUCCESS, - platform::errors::ResourceExhausted( - "XPU kernel error of ElementWiseAddOp, error message: " - "NO_ENOUGH_WORKSPACE, XPU has no enough memory.")); - } - dev_ctx.Wait(); - xpu_free(dy_data); - } - - if ((dx == nullptr || dy == nullptr) && !(dy != nullptr && len != n)) { - dev_ctx.Wait(); - } - - if (dx == nullptr) { - xpu_free(dx_data); - } - if (dy == nullptr) { - xpu_free(dy_data); - } + XPUElementwiseGrad(ctx, xpu::add_grad, false); } }; diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op_xpu.cc b/paddle/fluid/operators/elementwise/elementwise_div_op_xpu.cc index 6cc42766800..4f254a53074 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op_xpu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_div_op_xpu.cc @@ -19,18 +19,19 @@ limitations under the License. */ namespace paddle { namespace operators { -template -struct XPUDivFunctor { - int operator()(xpu::Context* ctx, const T* x, const T* y, T* z, int len) { - return xpu::elementwise_div(ctx, x, y, z, len); +template +class ElementwiseDivXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + XPUElementwise(ctx, xpu::div); } }; template -class ElementwiseDivXPUKernel : public framework::OpKernel { +class ElementwiseDivGradXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - XPUElementwise>(ctx); + XPUElementwiseGrad(ctx, xpu::div_grad, true); } }; @@ -40,4 +41,7 @@ namespace ops = paddle::operators; REGISTER_OP_XPU_KERNEL( elementwise_div, ops::ElementwiseDivXPUKernel); +REGISTER_OP_XPU_KERNEL(elementwise_div_grad, + ops::ElementwiseDivGradXPUKernel< + paddle::platform::XPUDeviceContext, float>); #endif diff --git a/paddle/fluid/operators/elementwise/elementwise_floordiv_op_xpu.cc b/paddle/fluid/operators/elementwise/elementwise_floordiv_op_xpu.cc new file mode 100644 index 00000000000..32ae3a6f2c0 --- /dev/null +++ b/paddle/fluid/operators/elementwise/elementwise_floordiv_op_xpu.cc @@ -0,0 +1,37 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_XPU +#include "paddle/fluid/operators/elementwise/elementwise_div_op.h" +#include "paddle/fluid/operators/elementwise/elementwise_op.h" +#include "paddle/fluid/operators/elementwise/elementwise_xpu.h" +namespace paddle { +namespace operators { + +template +class ElementwiseFloordivXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + XPUElementwise(ctx, xpu::floordiv); + } +}; + +} // namespace operators +} // namespace paddle +namespace ops = paddle::operators; +REGISTER_OP_XPU_KERNEL(elementwise_floordiv, + ops::ElementwiseFloordivXPUKernel< + paddle::platform::XPUDeviceContext, float>); + +#endif diff --git a/paddle/fluid/operators/elementwise/elementwise_max_op_xpu.cc b/paddle/fluid/operators/elementwise/elementwise_max_op_xpu.cc index 232cfa02397..411ddb26603 100644 --- a/paddle/fluid/operators/elementwise/elementwise_max_op_xpu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_max_op_xpu.cc @@ -20,18 +20,19 @@ limitations under the License. */ namespace paddle { namespace operators { -template -struct XPUMaxFunctor { - int operator()(xpu::Context* ctx, const T* x, const T* y, T* z, int len) { - return xpu::elementwise_max(ctx, x, y, z, len); +template +class ElementwiseMaxXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + XPUElementwise(ctx, xpu::max); } }; template -class ElementwiseMaxXPUKernel : public framework::OpKernel { +class ElementwiseMaxGradXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - XPUElementwise>(ctx); + XPUElementwiseGrad(ctx, xpu::max_grad, true); } }; @@ -42,4 +43,7 @@ namespace ops = paddle::operators; REGISTER_OP_XPU_KERNEL( elementwise_max, ops::ElementwiseMaxXPUKernel); +REGISTER_OP_XPU_KERNEL(elementwise_max_grad, + ops::ElementwiseMaxGradXPUKernel< + paddle::platform::XPUDeviceContext, float>); #endif diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op_xpu.cc b/paddle/fluid/operators/elementwise/elementwise_min_op_xpu.cc new file mode 100644 index 00000000000..0b1e1312264 --- /dev/null +++ b/paddle/fluid/operators/elementwise/elementwise_min_op_xpu.cc @@ -0,0 +1,49 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_XPU + +#include "paddle/fluid/operators/elementwise/elementwise_max_op.h" +#include "paddle/fluid/operators/elementwise/elementwise_op.h" +#include "paddle/fluid/operators/elementwise/elementwise_xpu.h" +namespace paddle { +namespace operators { + +template +class ElementwiseMinXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + XPUElementwise(ctx, xpu::min); + } +}; + +template +class ElementwiseMinGradXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + XPUElementwiseGrad(ctx, xpu::min_grad, true); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_XPU_KERNEL( + elementwise_min, + ops::ElementwiseMinXPUKernel); +REGISTER_OP_XPU_KERNEL(elementwise_min_grad, + ops::ElementwiseMinGradXPUKernel< + paddle::platform::XPUDeviceContext, float>); +#endif diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op_xpu.cc b/paddle/fluid/operators/elementwise/elementwise_mul_op_xpu.cc index d9a6ca844ae..02c6900c7c1 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op_xpu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op_xpu.cc @@ -22,10 +22,18 @@ template class ElementwiseMulXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - XPUElementwise>(ctx); + XPUElementwise(ctx, xpu::mul); } }; -DEFINE_XPU_GRAD_KERNEL(Mul, mul, true); +// DEFINE_XPU_GRAD_KERNEL(Mul, mul, true); +template +class ElementwiseMulGradXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + XPUElementwiseGrad(ctx, xpu::mul_grad, true); + } +}; + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/elementwise/elementwise_pow_op_xpu.cc b/paddle/fluid/operators/elementwise/elementwise_pow_op_xpu.cc new file mode 100644 index 00000000000..31b6ef9abce --- /dev/null +++ b/paddle/fluid/operators/elementwise/elementwise_pow_op_xpu.cc @@ -0,0 +1,40 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_XPU +#include "paddle/fluid/operators/elementwise/elementwise_op.h" +#include "paddle/fluid/operators/elementwise/elementwise_sub_op.h" +#include "paddle/fluid/operators/elementwise/elementwise_xpu.h" +#include "xpu/refactor/math.h" + +namespace paddle { +namespace operators { + +template +class ElementwisePowXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + XPUElementwise(ctx, xpu::pow); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_XPU_KERNEL( + elementwise_pow, + ops::ElementwisePowXPUKernel); + +#endif diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op_xpu.cc b/paddle/fluid/operators/elementwise/elementwise_sub_op_xpu.cc index 4e205fe4921..bef3a4904f4 100644 --- a/paddle/fluid/operators/elementwise/elementwise_sub_op_xpu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_sub_op_xpu.cc @@ -16,25 +16,28 @@ limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_sub_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/operators/elementwise/elementwise_xpu.h" +#include "xpu/refactor/math.h" + namespace paddle { namespace operators { -template -struct XPUSubFunctor { - int operator()(xpu::Context* ctx, const T* x, const T* y, T* z, int len) { - return xpu::elementwise_sub(ctx, x, y, z, len); +template +class ElementwiseSubXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + XPUElementwise(ctx, xpu::sub); } }; template -class ElementwiseSubXPUKernel : public framework::OpKernel { +class ElementwiseSubGradXPUKernel : public ElemwiseGradKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - XPUElementwise>(ctx); + ElemwiseGradKernel::Compute(ctx); + XPUElementwiseGrad(ctx, xpu::sub_grad, false); } }; -DEFINE_XPU_GRAD_KERNEL(Sub, sub, false); } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/elementwise/elementwise_xpu.h b/paddle/fluid/operators/elementwise/elementwise_xpu.h index 53f2cd2dccc..fdf5aeeba53 100644 --- a/paddle/fluid/operators/elementwise/elementwise_xpu.h +++ b/paddle/fluid/operators/elementwise/elementwise_xpu.h @@ -13,175 +13,76 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once #ifdef PADDLE_WITH_XPU +#include #include -#include +#include +#include +#include #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/place.h" - -inline std::string get_xpu_error_message(int error_type) { - static std::unordered_map xpu_error_map = { - {baidu::xpu::api::INVALID_PARAM, "Parameter is invalid."}, - {baidu::xpu::api::RUNTIME_ERROR, - "Please check whether Baidu Kunlun Card " - "is properly installed."}, - {baidu::xpu::api::NO_ENOUGH_WORKSPACE, - "There is not enough memory in Baidu" - " Kunlun Card."}}; - if (xpu_error_map.find(error_type) == xpu_error_map.end()) { - return "Unknown error type!"; - } - return xpu_error_map[error_type]; -} - -#define XPU_MALLOC(addr, num_bytes) \ - PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast(addr), num_bytes), \ - XPU_SUCCESS, \ - platform::errors::ResourceExhausted( \ - "\n\nOut of memory error on XPU, Cannot" \ - "allocate %s memory on XPU. \n\nPlease " \ - "check whether there is any other process " \ - "using XPU.\n", \ - string::HumanReadableSize(num_bytes))) - -#define DEFINE_XPU_GRAD_KERNEL(kernel_type, kernel_name, use_x_y_data) \ - template \ - class Elementwise##kernel_type##GradXPUKernel \ - : public ElemwiseGradKernel { \ - public: \ - void Compute(const framework::ExecutionContext& ctx) const override { \ - ElemwiseGradKernel::Compute(ctx); \ - using Tensor = framework::Tensor; \ - auto* dout = ctx.Input(framework::GradVarName("Out")); \ - auto* dx = ctx.Output(framework::GradVarName("X")); \ - auto* dy = ctx.Output(framework::GradVarName("Y")); \ - auto dx_dims = dout->dims(); \ - auto dy_dims_untrimed = dout->dims(); \ - T* dx_data = NULL; \ - T* dy_data = NULL; \ - const T* y_data = nullptr; \ - const T* x_data = nullptr; \ - T* y_broadcast = nullptr; \ - if (use_x_y_data) { \ - auto* x = ctx.Input("X"); \ - auto* y = ctx.Input("Y"); \ - y_data = y->data(); \ - x_data = x->data(); \ - } else { \ - x_data = dout->data(); \ - y_data = dout->data(); \ - } \ - int axis = ctx.Attr("axis"); \ - PADDLE_ENFORCE_GE( \ - dx_dims.size(), dy_dims_untrimed.size(), \ - platform::errors::InvalidArgument( \ - "Rank of first input must >= rank of second input.")); \ - if (dx != nullptr) { \ - dx->mutable_data(ctx.GetPlace()); \ - dx_dims = dx->dims(); \ - dx_data = dx->data(); \ - } \ - if (dy != nullptr) { \ - dy->mutable_data(ctx.GetPlace()); \ - dy_dims_untrimed = dy->dims(); \ - dy_data = dy->data(); \ - } \ - int pre, n, post, is_run_common_broadcast; \ - if (dx_dims == dy_dims_untrimed) { \ - pre = post = 1; \ - n = dout->numel(); \ - } else { \ - axis = (axis == -1 ? dx_dims.size() - dy_dims_untrimed.size() : axis); \ - PADDLE_ENFORCE_EQ(axis >= 0 && axis < dx_dims.size(), true, \ - platform::errors::InvalidArgument( \ - "Axis should be in range [0, dx_dims)")); \ - auto dy_dims = trim_trailing_singular_dims(dy_dims_untrimed); \ - axis = (dy_dims.size() == 0) ? dx_dims.size() : axis; \ - get_mid_dims(dx_dims, dy_dims, axis, &pre, &n, &post, \ - &is_run_common_broadcast); \ - } \ - int len = pre * n * post; \ - auto& dev_ctx = \ - ctx.template device_context(); \ - if (dx == nullptr) { \ - XPU_MALLOC(&dx_data, len * sizeof(float)); \ - } \ - if (dy == nullptr) { \ - XPU_MALLOC(&dy_data, len * sizeof(float)); \ - } else { \ - if (len != n) { \ - XPU_MALLOC(&dy_data, len * sizeof(float)); \ - } \ - } \ - if (use_x_y_data) { \ - if (len != n) { \ - XPU_MALLOC(&y_broadcast, len * sizeof(float)); \ - int res = \ - xpu::broadcast_ew(dev_ctx.x_context(), y_data, y_broadcast, pre, \ - n, post, xpu::ElementwiseOp::ASSIGN); \ - PADDLE_ENFORCE_EQ( \ - res, xpu::Error_t::SUCCESS, \ - platform::errors::External("XPU kernel error occur! %s", \ - get_xpu_error_message(res))); \ - y_data = y_broadcast; \ - } \ - } \ - int res = xpu::elementwise_##kernel_name##_grad( \ - dev_ctx.x_context(), x_data, y_data, dout->data() /*out*/, \ - dout->data(), dx_data, dy_data, len); \ - PADDLE_ENFORCE_EQ( \ - res, xpu::Error_t::SUCCESS, \ - platform::errors::External("XPU kernel error occur! %s", \ - get_xpu_error_message(res))); \ - if ((dy != nullptr) && (len != n)) { \ - int res = xpu::reduce_ew(dev_ctx.x_context(), dy_data, dy->data(), \ - pre, n, post, xpu::ElementwiseOp::ASSIGN); \ - PADDLE_ENFORCE_EQ( \ - res, xpu::Error_t::SUCCESS, \ - platform::errors::External("XPU kernel error occur! %s", \ - get_xpu_error_message(res))); \ - dev_ctx.Wait(); \ - xpu_free(dy_data); \ - } \ - if ((len != n || dx == nullptr || dy == nullptr) && \ - !(dy != nullptr && len != n)) { \ - dev_ctx.Wait(); \ - } \ - if (dx == nullptr) { \ - xpu_free(dx_data); \ - } \ - if (dy == nullptr) { \ - xpu_free(dy_data); \ - } \ - if (use_x_y_data) { \ - if (len != n) { \ - xpu_free(y_broadcast); \ - } \ - } \ - } \ - } +#include "xpu/refactor/math.h" namespace paddle { namespace operators { -template -struct XPUAddFunctor { - int operator()(xpu::Context* ctx, const T* x, const T* y, T* z, int len) { - return xpu::elementwise_add(ctx, x, y, z, len); +static std::pair, std::vector> XPUDimsToBroadcastVector( + const framework::DDim& x, const framework::DDim& y) { + std::vector x_v; + std::vector y_v; + int y_size = y.size(); + for (int i = 0; i < y_size; ++i) { + if (x[i] == y[i]) { + x_v.push_back(y[i]); + y_v.push_back(y[i]); + continue; + } + x_v.push_back(1); + x_v.push_back(x[i]); + y_v.push_back(y[i] / x[i]); + y_v.push_back(x[i]); } -}; + return std::make_pair(x_v, y_v); +} -template -struct XPUMulFunctor { - int operator()(xpu::Context* ctx, const T* x, const T* y, T* z, int len) { - return xpu::elementwise_mul(ctx, x, y, z, len); +static std::pair, std::vector> XPUReducesAxisVector( + const framework::DDim& x, const framework::DDim& y) { + std::vector x_vector; + std::vector axis_v; + PADDLE_ENFORCE_GT( + x.size(), 0, platform::errors::OutOfRange("x size is less 1, x shape is ", + x.to_str())); + PADDLE_ENFORCE_GT( + y.size(), 0, platform::errors::OutOfRange("y size is less 1, y shape is ", + y.to_str())); + + int y_nums = framework::product(y); + x_vector = framework::vectorize(x); + if (y_nums == 1) { + for (int i = 0; i < x.size(); ++i) { + axis_v.push_back(i); + } + return std::make_pair(x_vector, axis_v); + } + int yidx = 0; + for (size_t i = 0; i < x_vector.size(); ++i) { + if (y[yidx] == 1) { + axis_v.push_back(i); + yidx++; + continue; + } + if (x_vector[i] != y[yidx]) { + axis_v.push_back(i); + continue; + } + yidx++; } -}; + return std::make_pair(x_vector, axis_v); +} -template -void XPUElementwise(const framework::ExecutionContext& ctx) { - PADDLE_ENFORCE_EQ(platform::is_xpu_place(ctx.GetPlace()), true, - platform::errors::PreconditionNotMet( - "This kernel only runs on XPU device.")); +template +void XPUElementwise( + const framework::ExecutionContext& ctx, + std::function func) { auto x_var = ctx.InputVar("X"); PADDLE_ENFORCE_NE(x_var, nullptr, platform::errors::InvalidArgument( "Cannot get input Variable X")); @@ -194,74 +95,226 @@ void XPUElementwise(const framework::ExecutionContext& ctx) { auto* y = ctx.Input("Y"); auto* z = ctx.Output("Out"); z->mutable_data(ctx.GetPlace()); - - int axis = ctx.Attr("axis"); auto x_dims = x.dims(); - auto y_dims_untrimed = y->dims(); - PADDLE_ENFORCE_GE(x_dims.size(), y_dims_untrimed.size(), - platform::errors::InvalidArgument( - "Rank of first input must >= rank of second input.")); - axis = (axis == -1 ? x_dims.size() - y_dims_untrimed.size() : axis); - PADDLE_ENFORCE_EQ( - axis >= 0 && axis < x_dims.size(), true, - platform::errors::InvalidArgument("Axis should be in range [0, x_dims)")); - auto y_dims = trim_trailing_singular_dims(y_dims_untrimed); - axis = (y_dims.size() == 0) ? x_dims.size() : axis; - int pre, n, post, is_common_broadcast; - get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post, &is_common_broadcast); + auto y_dims = y->dims(); + int max_dim = std::max(x_dims.size(), y_dims.size()); + int axis = ctx.Attr("axis"); + axis = (axis == -1 ? std::abs(x_dims.size() - y_dims.size()) : axis); - PADDLE_ENFORCE_NE(is_common_broadcast, 1, - platform::errors::Unimplemented( - "X's shape should be equal to Y's shape.")); + PADDLE_ENFORCE_GE( + axis, 0, + platform::errors::InvalidArgument( + "Axis should be great than or equal to 0, but received axis is %d.", + axis)); + PADDLE_ENFORCE_LT(axis, max_dim, + platform::errors::InvalidArgument( + "Axis should be less than %d, but received axis is %d.", + max_dim, axis)); - int len = pre * n * post; + std::vector x_dims_array(max_dim); + std::vector y_dims_array(max_dim); + std::vector out_dims_array(max_dim); + GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array.data(), + y_dims_array.data(), out_dims_array.data(), max_dim, + axis); + framework::DDim out_dim = framework::make_ddim(out_dims_array); const T* x_data = x.data(); const T* y_data = y->data(); T* z_data = z->data(); - T* y_broadcast = nullptr; + bool need_wait = false; + framework::Tensor x_broadcast_tensor; + framework::Tensor y_broadcast_tensor; + auto& dev_ctx = + ctx.template device_context(); + int ret = xpu::SUCCESS; + // begin broadcast now + if (x.numel() != z->numel()) { + // broadcast x + std::pair, std::vector> bcast_v = + XPUDimsToBroadcastVector(framework::make_ddim(x_dims_array), out_dim); + + ret = xpu::broadcast( + dev_ctx.x_context(), x_data, + x_broadcast_tensor.mutable_data(ctx.GetPlace(), z->numel()), + bcast_v.first, bcast_v.second); + PADDLE_ENFORCE_EQ( + ret, xpu::SUCCESS, + platform::errors::External( + "XPU kernel broadcast occur error in XPUElementwise error code %d", + ret)); + need_wait = true; + x_data = x_broadcast_tensor.data(); + } + if (y->numel() != z->numel()) { + // broadcast y + std::vector bcast_x_v; + std::vector bcast_y_v; + std::pair, std::vector> bcast_v = + XPUDimsToBroadcastVector(framework::make_ddim(y_dims_array), out_dim); + ret = xpu::broadcast( + dev_ctx.x_context(), y_data, + y_broadcast_tensor.mutable_data(ctx.GetPlace(), z->numel()), + bcast_v.first, bcast_v.second); + PADDLE_ENFORCE_EQ( + ret, xpu::SUCCESS, + platform::errors::External( + "XPU kernel broadcast occur error in XPUElementwise error code %d", + ret)); + need_wait = true; + y_data = y_broadcast_tensor.data(); + } + int len = z->numel(); + ret = func(dev_ctx.x_context(), x_data, y_data, z_data, len); + PADDLE_ENFORCE_EQ( + ret, xpu::SUCCESS, + platform::errors::External( + "XPU kernel Elementwise occur error in XPUElementwise error code ", + ret)); + + if (need_wait && dev_ctx.x_context()->xpu_stream) { + dev_ctx.Wait(); + } +} + +template +void XPUElementwiseGrad(const framework::ExecutionContext& ctx, + std::function + func, + bool use_x_y_data) { + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* dz = ctx.Input(framework::GradVarName("Out")); + auto* z = dz; + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); + int axis = ctx.Attr("axis"); + const framework::DDim& x_dims = x->dims(); + const framework::DDim& y_dims = y->dims(); + int max_dim = std::max(x_dims.size(), y_dims.size()); + axis = (axis == -1 ? std::abs(x_dims.size() - y_dims.size()) : axis); + PADDLE_ENFORCE_GE( + axis, 0, + platform::errors::InvalidArgument( + "Axis should be great than or equal to 0, but received axis is %d.", + axis)); + PADDLE_ENFORCE_LT(axis, max_dim, + platform::errors::InvalidArgument( + "Axis should be less than %d, but received axis is %d.", + max_dim, axis)); + + std::vector x_dims_array(max_dim); + std::vector y_dims_array(max_dim); + std::vector out_dims_array(max_dim); + GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array.data(), + y_dims_array.data(), out_dims_array.data(), max_dim, + axis); + framework::DDim out_dim = framework::make_ddim(out_dims_array); + + int len = framework::product(out_dim); + + framework::Tensor x_broadcast_tensor; + framework::Tensor y_broadcast_tensor; + + framework::Tensor dx_local_tensor; + framework::Tensor dy_local_tensor; + + bool need_wait = false; + const T* x_data = use_x_y_data ? x->data() : z->data(); + const T* y_data = use_x_y_data ? y->data() : z->data(); + + const T* z_data = z->data(); + const T* dz_data = (const T*)dz->data(); + + bool dx_need_reduce = (dx != nullptr) && (dx->numel() != len); + bool dy_need_reduce = (dy != nullptr) && (dy->numel() != len); + + T* dx_data = ((dx == nullptr) || dx_need_reduce) + ? (dx_local_tensor.mutable_data(ctx.GetPlace(), len)) + : (dx->mutable_data(ctx.GetPlace())); + + T* dy_data = ((dy == nullptr) || dy_need_reduce) + ? (dy_local_tensor.mutable_data(ctx.GetPlace(), len)) + : (dy->mutable_data(ctx.GetPlace())); + + int ret = xpu::SUCCESS; auto& dev_ctx = ctx.template device_context(); - if (post == 1) { - if (std::is_same>::value) { - int res = xpu::matrix_vector_add(dev_ctx.x_context(), x_data, y_data, - z_data, pre, n); - PADDLE_ENFORCE_EQ(res, xpu::Error_t::SUCCESS, - platform::errors::External("XPU kernel error occur! %s", - get_xpu_error_message(res))); - return; - } - if (std::is_same>::value) { - int res = xpu::matrix_vector_mul(dev_ctx.x_context(), x_data, y_data, - z_data, pre, n); - PADDLE_ENFORCE_EQ(res, xpu::Error_t::SUCCESS, - platform::errors::External("XPU kernel error occur! %s", - get_xpu_error_message(res))); - return; - } + if (use_x_y_data && x->numel() != len) { + std::vector bcast_x_v; + std::vector bcast_y_v; + std::pair, std::vector> bcast_v = + XPUDimsToBroadcastVector(framework::make_ddim(x_dims_array), out_dim); + ret = xpu::broadcast( + dev_ctx.x_context(), x_data, + x_broadcast_tensor.mutable_data(ctx.GetPlace(), len), bcast_v.first, + bcast_v.second); + PADDLE_ENFORCE_EQ(ret, xpu::SUCCESS, + platform::errors::External( + "XPU kernel broadcast error occur! %d", ret)); + need_wait = true; + x_data = x_broadcast_tensor.data(); + } + + if (use_x_y_data && y->numel() != len) { + // broadcast y + std::vector bcast_x_v; + std::vector bcast_y_v; + std::pair, std::vector> bcast_v = + XPUDimsToBroadcastVector(framework::make_ddim(y_dims_array), out_dim); + ret = xpu::broadcast( + dev_ctx.x_context(), y_data, + y_broadcast_tensor.mutable_data(ctx.GetPlace(), len), bcast_v.first, + bcast_v.second); + PADDLE_ENFORCE_EQ(ret, xpu::SUCCESS, + platform::errors::External( + "XPU kernel broadcast error occur! %d", ret)); + need_wait = true; + y_data = y_broadcast_tensor.data(); } - if (pre != 1 || post != 1) { - XPU_MALLOC(&y_broadcast, len * sizeof(T)); - int res = xpu::broadcast_ew(dev_ctx.x_context(), y_data, y_broadcast, pre, - n, post, xpu::ElementwiseOp::ASSIGN); - PADDLE_ENFORCE_EQ(res, xpu::Error_t::SUCCESS, - platform::errors::External("XPU kernel error occur! %s", - get_xpu_error_message(res))); - y_data = y_broadcast; + ret = func(dev_ctx.x_context(), x_data, y_data, z_data, dz_data, dx_data, + dy_data, len); + PADDLE_ENFORCE_EQ(ret, xpu::SUCCESS, platform::errors::External( + "XPU kernel binary occur error in " + "XPUElementwiseGrad, error code %d", + ret)); + + if (dx_need_reduce) { + const framework::DDim& dx_dims = dx->dims(); + std::pair, std::vector> reduce_v = + XPUReducesAxisVector(out_dim, dx_dims); + ret = xpu::reduce_sum(dev_ctx.x_context(), dx_data, + dx->mutable_data(ctx.GetPlace()), reduce_v.first, + reduce_v.second); + PADDLE_ENFORCE_EQ( + ret, xpu::SUCCESS, + platform::errors::External("XPU kernel reduce_sum occur error in " + "XPUElementwiseGrad, error code %d", + ret)); + need_wait = true; } - Functor functor; - int res = functor(dev_ctx.x_context(), x_data, y_data, z_data, len); - PADDLE_ENFORCE_EQ(res, xpu::Error_t::SUCCESS, - platform::errors::External("XPU kernel error occur! %s", - get_xpu_error_message(res))); + if (dy_need_reduce) { + const framework::DDim& dy_dims = dy->dims(); + std::pair, std::vector> reduce_v = + XPUReducesAxisVector(out_dim, dy_dims); + ret = xpu::reduce_sum(dev_ctx.x_context(), dy_data, + dy->mutable_data(ctx.GetPlace()), reduce_v.first, + reduce_v.second); + PADDLE_ENFORCE_EQ( + ret, xpu::SUCCESS, + platform::errors::External("XPU kernel reduce_sum occur error in " + "XPUElementwiseGrad, error code %d", + ret)); + need_wait = true; + } - if (pre != 1 || post != 1) { + if (need_wait && dev_ctx.x_context()->xpu_stream) { dev_ctx.Wait(); - xpu_free(y_broadcast); } } diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op_xpu.cc b/paddle/fluid/operators/softmax_with_cross_entropy_op_xpu.cc index f4f6eb9cdc8..368a12057c8 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op_xpu.cc +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op_xpu.cc @@ -19,6 +19,9 @@ limitations under the License. */ #include #include +#include "xpu/refactor/math.h" +#include "xpu/refactor/nn.h" + namespace paddle { namespace operators { @@ -41,11 +44,13 @@ class SoftmaxWithCrossEntropyXPUKernel : public framework::OpKernel { loss->mutable_data(context.GetPlace()); const int n = SizeToAxis(axis, logits->dims()); const int d = SizeFromAxis(axis, logits->dims()); + std::vector logits_dims = framework::vectorize(logits->dims()); // softmax auto& dev_ctx = context.template device_context(); - int r = xpu::softmax2d_forward(dev_ctx.x_context(), logits->data(), - softmax->data(), n, d); + int r = xpu::softmax(dev_ctx.x_context(), logits->data(), + softmax->data(), logits_dims, axis); + PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, platform::errors::External("XPU kernel error. Softmax2d_forward " @@ -55,44 +60,35 @@ class SoftmaxWithCrossEntropyXPUKernel : public framework::OpKernel { auto ignore_index = context.Attr("ignore_index"); const bool soft_label = context.Attr("soft_label"); if (soft_label) { - PADDLE_THROW(platform::errors::InvalidArgument( - "XPU only support soft_label == false for now!")); + r = xpu::soft_cross_entropy( + dev_ctx.x_context(), softmax->data(), labels->data(), + loss->data(), n, d); + PADDLE_ENFORCE_EQ( + r, xpu::Error_t::SUCCESS, + platform::errors::External("XPU kernel error. soft_cross_entropy " + "execution not succeed, error code=%d", + r)); } else { - auto* p_labels = labels->data(); - int64_t* labels_int64_host = - reinterpret_cast(std::malloc(n * sizeof(int64_t))); - int* labels_int32_host = - reinterpret_cast(std::malloc(n * sizeof(int))); - int* labels_int32_device = NULL; - int ret = xpu_malloc(reinterpret_cast(&labels_int32_device), - n * sizeof(int)); - PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, - platform::errors::External( - "XPU API return wrong value[%d], please check " - "where Baidu Kunlun Card is properly installed.", - ret)); - dev_ctx.Wait(); - memory::Copy(platform::CPUPlace(), labels_int64_host, - BOOST_GET_CONST(platform::XPUPlace, context.GetPlace()), - p_labels, n * sizeof(int64_t)); - for (int i = 0; i < n; ++i) { - labels_int32_host[i] = labels_int64_host[i]; - } - memory::Copy(BOOST_GET_CONST(platform::XPUPlace, context.GetPlace()), - labels_int32_device, platform::CPUPlace(), labels_int32_host, - n * sizeof(int)); - int r = xpu::cross_entropy_forward( - dev_ctx.x_context(), n, d, softmax->data(), - labels_int32_device, loss->data(), nullptr, ignore_index); + Tensor labels_int32; + labels_int32.mutable_data(context.GetPlace(), labels->numel()); + r = xpu::cast_v2( + dev_ctx.x_context(), labels->data(), + labels_int32.data(), labels->numel()); + PADDLE_ENFORCE_EQ( + r, xpu::Error_t::SUCCESS, + platform::errors::External("XPU kernel error. cast_v2 " + "execution not succeed, error code=%d", + r)); + + r = xpu::hard_cross_entropy( + dev_ctx.x_context(), softmax->data(), + labels_int32.data(), loss->data(), nullptr, n, d, + ignore_index); PADDLE_ENFORCE_EQ( r, xpu::Error_t::SUCCESS, - platform::errors::External("XPU kernel error. Cross_entropy_forward " + platform::errors::External("XPU kernel error. hard_cross_entropy " "execution not succeed, error code=%d", r)); - dev_ctx.Wait(); - std::free(labels_int32_host); - std::free(labels_int64_host); - xpu_free(labels_int32_device); } } }; diff --git a/python/paddle/fluid/tests/unittests/xpu/elementwise.py b/python/paddle/fluid/tests/unittests/xpu/elementwise.py deleted file mode 100644 index f4f2ddb19cf..00000000000 --- a/python/paddle/fluid/tests/unittests/xpu/elementwise.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import paddle -import paddle.fluid as fluid -paddle.enable_static() - - -class TestXPUElementwiseOpBase(object): - def setUp(self, op_type): - self.op_type = op_type - self.attrs = {'use_xpu': True} - self.is_common_broadcast = False - self.is_x_size_less_than_y = False - self.grad_implemented = False - self.y_grad_implemented = True - self.dtype = np.float32 - self.__class__.op_type = self.op_type - self.__class__.use_xpu = True - self.__class__.dtype = self.dtype - - def net(self, place): - with fluid.program_guard(fluid.Program(), fluid.Program()): - x = fluid.layers.data( - name='X', shape=self.inputs['X'].shape, dtype=self.dtype) - y = fluid.layers.data( - name='Y', shape=self.inputs['Y'].shape, dtype=self.dtype) - op = getattr(fluid.layers, self.op_type) - z = op(x, y) - exe = fluid.Executor(place) - z_value = exe.run(feed=self.inputs, fetch_list=[z.name]) - - def test_check_output(self): - if paddle.is_compiled_with_xpu(): - place = paddle.XPUPlace(0) - if not self.is_common_broadcast and not self.is_x_size_less_than_y: - self.check_output_with_place(place, atol=1e-3) - else: - with self.assertRaises(BaseException): - self.net(place) - - def _check_grad_xpu_helper(self, - inputs_to_check, - output_names, - no_grad_set=None, - max_relative_error=0.01): - if self.grad_implemented and not self.is_common_broadcast \ - and not self.is_x_size_less_than_y: - if paddle.is_compiled_with_xpu(): - place = paddle.XPUPlace(0) - self.check_grad_with_place( - place, - inputs_to_check, - output_names, - no_grad_set=no_grad_set, - max_relative_error=max_relative_error) - - def test_check_grad_normal(self): - self._check_grad_xpu_helper(['X', 'Y'], 'Out') - - def test_check_grad_ingore_x(self): - self._check_grad_xpu_helper(['Y'], 'Out', set("X")) - - def test_check_grad_ingore_y(self): - if self.y_grad_implemented: - self._check_grad_xpu_helper(['X'], 'Out', set("Y")) - - def init_axis(self): - self.axis = -1 - - def make_input(self, x_shape=[13, 17], y_shape=[13, 17]): - self.inputs = { - 'X': np.random.uniform(0.1, 1, x_shape).astype(self.dtype), - 'Y': np.random.uniform(0.1, 1, y_shape).astype(self.dtype) - } - - def reshape_input(self, x_shape=None, y_shape=None): - if x_shape is None: - x = self.inputs['X'] - else: - x = self.inputs['X'].reshape(x_shape) - if y_shape is None: - y = self.inputs['Y'] - else: - y = self.inputs['Y'].reshape(y_shape) - return x, y - - def make_output(self, x_shape=None, y_shape=None): - pass diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu.py index 9c6e7d21c1a..c4905a229b2 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op_xpu.py @@ -13,18 +13,21 @@ # limitations under the License. from __future__ import print_function +import numpy as np import sys sys.path.append("..") -import unittest -import numpy as np import paddle -import paddle.fluid.core as core from op_test import OpTest, skip_check_grad_ci +from op_test_xpu import XPUOpTest +import unittest import paddle.fluid as fluid from paddle.fluid import compiler, Program, program_guard +paddle.enable_static() -class TestElementwiseAddOp(OpTest): +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwiseAddOp(XPUOpTest): def init_kernel_type(self): self.use_mkldnn = False @@ -34,6 +37,7 @@ class TestElementwiseAddOp(OpTest): self.init_input_output() self.init_kernel_type() self.init_axis() + self.use_xpu = True self.inputs = { 'X': OpTest.np_dtype_to_fluid_dtype(self.x), @@ -43,80 +47,33 @@ class TestElementwiseAddOp(OpTest): self.outputs = {'Out': self.out} def test_check_output(self): - # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_output(check_dygraph=(self.use_mkldnn == False)) - - def test_check_grad_normal(self): - # TODO(wangzhongpu): support mkldnn op in dygraph mode - if self.dtype == np.float16: - return - self.check_grad( - ['X', 'Y'], 'Out', check_dygraph=(self.use_mkldnn == False)) - - def test_check_grad_ingore_x(self): - # TODO(wangzhongpu): support mkldnn op in dygraph mode - if self.dtype == np.float16: - return - self.check_grad( - ['Y'], - 'Out', - no_grad_set=set("X"), - check_dygraph=(self.use_mkldnn == False)) - - def test_check_grad_ingore_y(self): - # TODO(wangzhongpu): support mkldnn op in dygraph mode - if self.dtype == np.float16: - return - self.check_grad( - ['X'], - 'Out', - no_grad_set=set('Y'), - check_dygraph=(self.use_mkldnn == False)) - - def init_input_output(self): - self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) - self.y = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) - self.out = np.add(self.x, self.y) - - def init_dtype(self): - self.dtype = np.float64 - - def init_axis(self): - self.axis = -1 - - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestXPUElementwiseAddOp(OpTest): - def setUp(self): - self.op_type = "elementwise_add" - self.init_dtype() - self.init_input_output() - self.init_axis() - - self.inputs = {'X': self.x, 'Y': self.y} - self.attrs = {'axis': self.axis, 'use_mkldnn': False, 'use_xpu': True} - self.outputs = {'Out': self.out} - - def test_check_output(self): - if self.dtype == np.float32 and paddle.is_compiled_with_xpu(): + if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) self.check_output_with_place(place) def test_check_grad_normal(self): - if self.dtype == np.float32 and paddle.is_compiled_with_xpu(): + if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) - self.check_grad_with_place(place, ['X', 'Y'], 'Out') + self.check_grad_with_place( + place, ['X', 'Y'], 'Out', max_relative_error=0.006) def test_check_grad_ingore_x(self): - if self.dtype == np.float32 and paddle.is_compiled_with_xpu(): + if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) - self.check_grad_with_place(place, ['Y'], 'Out') + self.check_grad_with_place( + place, ['Y'], + 'Out', + no_grad_set=set("X"), + max_relative_error=0.006) def test_check_grad_ingore_y(self): - if self.dtype == np.float32 and paddle.is_compiled_with_xpu(): + if paddle.is_compiled_with_xpu(): place = paddle.XPUPlace(0) - self.check_grad_with_place(place, ['X'], 'Out') + self.check_grad_with_place( + place, ['X'], + 'Out', + no_grad_set=set("Y"), + max_relative_error=0.006) def init_input_output(self): self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) @@ -130,6 +87,8 @@ class TestXPUElementwiseAddOp(OpTest): self.axis = -1 +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseAddOp_scalar(TestElementwiseAddOp): @@ -139,6 +98,8 @@ class TestElementwiseAddOp_scalar(TestElementwiseAddOp): self.out = self.x + self.y +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1,1) to test broadcast.") class TestElementwiseAddOp_scalar2(TestElementwiseAddOp): @@ -148,6 +109,8 @@ class TestElementwiseAddOp_scalar2(TestElementwiseAddOp): self.out = self.x + self.y +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") class TestElementwiseAddOp_Vector(TestElementwiseAddOp): def init_input_output(self): self.x = np.random.random((100, )).astype(self.dtype) @@ -155,6 +118,8 @@ class TestElementwiseAddOp_Vector(TestElementwiseAddOp): self.out = np.add(self.x, self.y) +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp): def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) @@ -165,6 +130,8 @@ class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp): self.axis = 0 +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp): def init_input_output(self): self.x = np.random.rand(2, 100, 3).astype(self.dtype) @@ -175,6 +142,8 @@ class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp): self.axis = 1 +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp): def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) @@ -182,6 +151,8 @@ class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp): self.out = self.x + self.y.reshape(1, 1, 100) +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp): def init_input_output(self): self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype) @@ -192,6 +163,8 @@ class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp): self.axis = 1 +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp): def init_input_output(self): self.x = np.random.rand(100, 2, 3, 4).astype(self.dtype) @@ -202,6 +175,8 @@ class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp): self.axis = 0 +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") class TestElementwiseAddOp_broadcast_5(TestElementwiseAddOp): def init_input_output(self): self.x = np.random.rand(10, 3, 12).astype(self.dtype) @@ -209,6 +184,8 @@ class TestElementwiseAddOp_broadcast_5(TestElementwiseAddOp): self.out = self.x + self.y +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") class TestElementwiseAddOp_broadcast_6(TestElementwiseAddOp): def init_input_output(self): self.x = np.random.rand(2, 12, 3, 5).astype(self.dtype) @@ -216,6 +193,8 @@ class TestElementwiseAddOp_broadcast_6(TestElementwiseAddOp): self.out = self.x + self.y +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") class TestElementwiseAddOp_broadcast_7(TestElementwiseAddOp): def init_input_output(self): self.x = np.random.rand(1, 1, 20, 5).astype(self.dtype) @@ -223,6 +202,8 @@ class TestElementwiseAddOp_broadcast_7(TestElementwiseAddOp): self.out = self.x + self.y +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp): def init_input_output(self): self.x = np.random.rand(2, 10, 12).astype(self.dtype) @@ -233,6 +214,8 @@ class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp): self.axis = 1 +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") @skip_check_grad_ci( reason="[skip shape check] Use y_shape(1) to test broadcast.") class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp): @@ -245,6 +228,8 @@ class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp): self.axis = 1 +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") class TestElementwiseAddOp_channelwise_add(TestElementwiseAddOp): def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) @@ -255,6 +240,8 @@ class TestElementwiseAddOp_channelwise_add(TestElementwiseAddOp): self.axis = -1 +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") class TestElementwiseAddOp_commonuse_add1(TestElementwiseAddOp): def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) @@ -265,6 +252,8 @@ class TestElementwiseAddOp_commonuse_add1(TestElementwiseAddOp): self.axis = -1 +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") class TestElementwiseAddOp_commonuse_add2(TestElementwiseAddOp): def init_input_output(self): self.x = np.random.rand(10, 3, 1, 4).astype(self.dtype) @@ -275,6 +264,8 @@ class TestElementwiseAddOp_commonuse_add2(TestElementwiseAddOp): self.axis = -1 +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp): def init_input_output(self): self.x = np.random.rand(10, 12).astype(self.dtype) @@ -285,14 +276,16 @@ class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp): self.axis = 2 +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") class TestElementwiseAddOpError(unittest.TestCase): def test_errors(self): with program_guard(Program(), Program()): # the input of elementwise_add must be Variable. x1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.XPUPlace(0)) y1 = fluid.create_lod_tensor( - np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.XPUPlace(0)) self.assertRaises(TypeError, fluid.layers.elementwise_add, x1, y1) # the input dtype of elementwise_add must be float16 or float32 or float64 or int32 or int64 @@ -302,6 +295,8 @@ class TestElementwiseAddOpError(unittest.TestCase): self.assertRaises(TypeError, fluid.layers.elementwise_add, x2, y2) +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") class TestAddOp(unittest.TestCase): def test_name(self): with fluid.program_guard(fluid.Program()): @@ -324,7 +319,7 @@ class TestAddOp(unittest.TestCase): y = fluid.data(name="y", shape=[3], dtype='float32') z = paddle.add(x, y) - place = fluid.CPUPlace() + place = fluid.XPUPlace(0) exe = fluid.Executor(place) z_value = exe.run(feed=gen_data(), fetch_list=[z.name]) z_expected = np.array([3., 8., 6.]) @@ -332,8 +327,8 @@ class TestAddOp(unittest.TestCase): def test_dygraph(self): with fluid.dygraph.guard(): - np_x = np.array([2, 3, 4]).astype('float64') - np_y = np.array([1, 5, 2]).astype('float64') + np_x = np.array([2, 3, 4]).astype('float32') + np_y = np.array([1, 5, 2]).astype('float32') x = fluid.dygraph.to_variable(np_x) y = fluid.dygraph.to_variable(np_y) z = paddle.add(x, y) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_div_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_div_op_xpu.py index cb6e412cb0f..0fd35d7a457 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_div_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_div_op_xpu.py @@ -17,121 +17,233 @@ import unittest import numpy as np import paddle import paddle.fluid as fluid +import paddle.fluid.core as core from op_test import OpTest, skip_check_grad_ci -from elementwise import TestXPUElementwiseOpBase +from op_test_xpu import XPUOpTest paddle.enable_static() @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestXPUElementwiseDivOp(OpTest, TestXPUElementwiseOpBase): +class ElementwiseDivOp(XPUOpTest): def setUp(self): - TestXPUElementwiseOpBase.setUp(self, "elementwise_div") - self.make_input() - self.make_output() - - def make_output(self, x_shape=None, y_shape=None): - x, y = self.reshape_input(x_shape, y_shape) - self.outputs = {'Out': np.divide(x, y)} + self.op_type = "elementwise_div" + self.dtype = np.float32 + self.init_dtype() + self.use_xpu = True + """ Warning + CPU gradient check error! + 'X': np.random.random((32,84)).astype("float32"), + 'Y': np.random.random((32,84)).astype("float32") + """ + self.inputs = { + 'X': np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype), + 'Y': np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) + } + self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])} + + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + def test_check_grad_normal(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['X', 'Y'], 'Out', max_relative_error=0.05) + + def test_check_grad_ingore_x(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['Y'], + 'Out', + max_relative_error=0.05, + no_grad_set=set("X")) + + def test_check_grad_ingore_y(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['X'], + 'Out', + max_relative_error=0.05, + no_grad_set=set('Y')) + + def init_dtype(self): + pass + + +@skip_check_grad_ci( + reason="[skip shape check] Use y_shape(1) to test broadcast.") +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwiseDivOp_scalar(ElementwiseDivOp): + def setUp(self): + self.op_type = "elementwise_div" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [20, 3, 4]).astype(np.float32), + 'Y': np.random.uniform(0.1, 1, [1]).astype(np.float32) + } + self.outputs = {'Out': self.inputs['X'] / self.inputs['Y']} @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseDivOp_scalar(TestXPUElementwiseDivOp): +class TestElementwiseDivOp_Vector(ElementwiseDivOp): def setUp(self): - super(TestElementwiseDivOp_scalar, self).setUp() - self.grad_implemented = False - self.make_input([20, 3, 4], [1]) - self.make_output() + self.op_type = "elementwise_div" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [100]).astype("float32"), + 'Y': np.random.uniform(0.1, 1, [100]).astype("float32") + } + self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])} @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseDivOp_Vector(TestXPUElementwiseDivOp): +class TestElementwiseDivOp_broadcast_0(ElementwiseDivOp): def setUp(self): - super(TestElementwiseDivOp_Vector, self).setUp() - self.make_input([100, ], [100, ]) - self.make_output() + self.op_type = "elementwise_div" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [100, 3, 4]).astype("float32"), + 'Y': np.random.uniform(0.1, 1, [100]).astype("float32") + } + + self.attrs = {'axis': 0} + self.outputs = { + 'Out': + np.divide(self.inputs['X'], self.inputs['Y'].reshape(100, 1, 1)) + } @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseDivOp_broadcast_0(TestXPUElementwiseDivOp): +class TestElementwiseDivOp_broadcast_1(ElementwiseDivOp): def setUp(self): - super(TestElementwiseDivOp_broadcast_0, self).setUp() - self.attrs['axis'] = 0 - self.make_input([100, 3, 4], [100, ]) - self.make_output(y_shape=[100, 1, 1]) + self.op_type = "elementwise_div" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [2, 100, 4]).astype("float32"), + 'Y': np.random.uniform(0.1, 1, [100]).astype("float32") + } + + self.attrs = {'axis': 1} + self.outputs = { + 'Out': + np.divide(self.inputs['X'], self.inputs['Y'].reshape(1, 100, 1)) + } @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseDivOp_broadcast_1(TestXPUElementwiseDivOp): +class TestElementwiseDivOp_broadcast_2(ElementwiseDivOp): def setUp(self): - super(TestElementwiseDivOp_broadcast_1, self).setUp() - self.attrs['axis'] = 1 - self.make_input([2, 100, 4], [100, ]) - self.make_output(y_shape=[1, 100, 1]) + self.op_type = "elementwise_div" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [2, 3, 100]).astype("float32"), + 'Y': np.random.uniform(0.1, 1, [100]).astype("float32") + } + + self.outputs = { + 'Out': + np.divide(self.inputs['X'], self.inputs['Y'].reshape(1, 1, 100)) + } @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseDivOp_broadcast_2(TestXPUElementwiseDivOp): +class TestElementwiseDivOp_broadcast_3(ElementwiseDivOp): def setUp(self): - super(TestElementwiseDivOp_broadcast_2, self).setUp() - self.make_input([2, 3, 100], [100, ]) - self.make_output(y_shape=[1, 1, 100]) + self.op_type = "elementwise_div" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [2, 10, 12, 5]).astype("float32"), + 'Y': np.random.uniform(0.1, 1, [10, 12]).astype("float32") + } + + self.attrs = {'axis': 1} + self.outputs = { + 'Out': + np.divide(self.inputs['X'], self.inputs['Y'].reshape(1, 10, 12, 1)) + } @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseDivOp_broadcast_3(TestXPUElementwiseDivOp): +class TestElementwiseDivOp_broadcast_4(ElementwiseDivOp): def setUp(self): - super(TestElementwiseDivOp_broadcast_3, self).setUp() - self.attrs['axis'] = 1 - self.make_input([2, 10, 12, 5], [10, 12]) - self.make_output(y_shape=[1, 10, 12, 1]) + self.op_type = "elementwise_div" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [2, 3, 50]).astype("float32"), + 'Y': np.random.uniform(0.1, 1, [2, 1, 50]).astype("float32") + } + self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])} @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseDivOp_broadcast_4(TestXPUElementwiseDivOp): +class TestElementwiseDivOp_broadcast_5(ElementwiseDivOp): def setUp(self): - super(TestElementwiseDivOp_broadcast_4, self).setUp() - self.is_common_broadcast = True - self.make_input([2, 3, 50], [2, 1, 50]) - self.make_output() + self.op_type = "elementwise_div" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [2, 3, 4, 20]).astype("float32"), + 'Y': np.random.uniform(0.1, 1, [2, 3, 1, 20]).astype("float32") + } + self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])} @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseDivOp_broadcast_5(TestXPUElementwiseDivOp): +class TestElementwiseDivOp_commonuse_1(ElementwiseDivOp): def setUp(self): - super(TestElementwiseDivOp_broadcast_5, self).setUp() - self.is_common_broadcast = True - self.make_input([2, 3, 4, 20], [2, 3, 1, 20]) - self.make_output() + self.op_type = "elementwise_div" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [2, 3, 100]).astype("float32"), + 'Y': np.random.uniform(0.1, 1, [1, 1, 100]).astype("float32"), + } + self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])} @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseDivOp_commonuse_1(TestXPUElementwiseDivOp): +class TestElementwiseDivOp_commonuse_2(ElementwiseDivOp): def setUp(self): - super(TestElementwiseDivOp_commonuse_1, self).setUp() - self.is_common_broadcast = True - self.make_input([2, 3, 100], [1, 1, 100]) - self.make_output() + self.op_type = "elementwise_div" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [30, 3, 1, 5]).astype("float32"), + 'Y': np.random.uniform(0.1, 1, [30, 1, 4, 1]).astype("float32"), + } + self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])} @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseDivOp_xsize_lessthan_ysize(TestXPUElementwiseDivOp): +class TestElementwiseDivOp_xsize_lessthan_ysize(ElementwiseDivOp): def setUp(self): - super(TestElementwiseDivOp_xsize_lessthan_ysize, self).setUp() - self.is_x_size_less_than_y = True - self.attrs['axis'] = 2 - self.make_input([10, 12], [2, 3, 10, 12]) - self.make_output(x_shape=[1, 1, 10, 12]) + self.op_type = "elementwise_div" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [10, 12]).astype("float32"), + 'Y': np.random.uniform(0.1, 1, [2, 3, 10, 12]).astype("float32"), + } + + self.attrs = {'axis': 2} + + self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])} + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwiseDivBroadcast(unittest.TestCase): + def test_shape_with_batch_sizes(self): + with fluid.program_guard(fluid.Program()): + x_var = fluid.data( + name='x', dtype='float32', shape=[None, 3, None, None]) + one = 2. + out = one / x_var + exe = fluid.Executor(fluid.XPUPlace(0)) + x = np.random.uniform(0.1, 0.6, (1, 3, 32, 32)).astype("float32") + out_result, = exe.run(feed={'x': x}, fetch_list=[out]) + self.assertEqual((out_result == (2 / x)).all(), True) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_floordiv_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_floordiv_op_xpu.py new file mode 100644 index 00000000000..cc8ec3cac2c --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_floordiv_op_xpu.py @@ -0,0 +1,87 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +sys.path.append("..") +import unittest +import numpy as np +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +from op_test import OpTest, skip_check_grad_ci +from op_test_xpu import XPUOpTest +paddle.enable_static() +import random + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwiseModOp(XPUOpTest): + def init_kernel_type(self): + self.use_mkldnn = False + + def setUp(self): + self.op_type = "elementwise_floordiv" + self.dtype = np.float32 + self.axis = -1 + self.init_dtype() + self.init_input_output() + self.init_kernel_type() + self.init_axis() + + self.inputs = { + 'X': OpTest.np_dtype_to_fluid_dtype(self.x), + 'Y': OpTest.np_dtype_to_fluid_dtype(self.y) + } + self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn} + self.outputs = {'Out': self.out} + + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + def init_input_output(self): + self.x = np.random.uniform(0, 10000, [10, 10]).astype(self.dtype) + self.y = np.random.uniform(0, 1000, [10, 10]).astype(self.dtype) + self.out = np.floor_divide(self.x, self.y) + + def init_dtype(self): + pass + + def init_axis(self): + pass + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwiseModOp_scalar(TestElementwiseModOp): + def init_input_output(self): + scale_x = random.randint(0, 100000000) + scale_y = random.randint(1, 100000000) + self.x = (np.random.rand(2, 3, 4) * scale_x).astype(self.dtype) + self.y = (np.random.rand(1) * scale_y + 1).astype(self.dtype) + self.out = np.floor_divide(self.x, self.y) + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwiseModOpInverse(TestElementwiseModOp): + def init_input_output(self): + self.x = np.random.uniform(0, 10000, [10]).astype(self.dtype) + self.y = np.random.uniform(0, 1000, [10, 10]).astype(self.dtype) + self.out = np.floor_divide(self.x, self.y) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_max_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_max_op_xpu.py index 340c5895c13..dbe575d406a 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_max_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_max_op_xpu.py @@ -16,113 +16,163 @@ sys.path.append("..") import unittest import numpy as np from op_test import OpTest, skip_check_grad_ci +from op_test_xpu import XPUOpTest import paddle -from elementwise import TestXPUElementwiseOpBase paddle.enable_static() @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestXPUElementwiseOp(OpTest, TestXPUElementwiseOpBase): +class TestElementwiseOp(XPUOpTest): def setUp(self): - TestXPUElementwiseOpBase.setUp(self, "elementwise_max") - self.make_input() - self.make_output() - - def make_input(self, x_shape=[13, 17], y_shape=[13, 17], idx_list=None): - x = np.random.random(x_shape).astype(self.dtype) - sgn = np.random.choice([-1, 1], y_shape).astype(self.dtype) - if idx_list is None: - y = x + sgn * np.random.uniform(0.1, 1, y_shape).astype(self.dtype) - else: - x_temp = x - for idx in idx_list: - x_temp = np.take(x_temp, [0], axis=idx) - sgn = sgn.reshape(x_temp.shape) - y = x_temp + sgn * np.random.uniform(0.1, 1, x_temp.shape) - y = y.reshape(y_shape).astype(self.dtype) - + self.use_xpu = True + self.op_type = "elementwise_max" + # If x and y have the same value, the max() is not differentiable. + # So we generate test data by the following method + # to avoid them being too close to each other. + x = np.random.uniform(0.1, 1, [13, 17]).astype("float32") + sgn = np.random.choice([-1, 1], [13, 17]).astype("float32") + y = x + sgn * np.random.uniform(0.1, 1, [13, 17]).astype("float32") self.inputs = {'X': x, 'Y': y} - - def make_output(self, x_shape=None, y_shape=None): - x, y = self.reshape_input(x_shape, y_shape) - self.outputs = {'Out': np.maximum(x, y)} - - + self.outputs = {'Out': np.maximum(self.inputs['X'], self.inputs['Y'])} + + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + def test_check_grad_normal(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place(place, ['X', 'Y'], 'Out') + + def test_check_grad_ingore_x(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['Y'], + 'Out', + max_relative_error=0.006, + no_grad_set=set("X")) + + def test_check_grad_ingore_y(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['X'], + 'Out', + max_relative_error=0.006, + no_grad_set=set('Y')) + + +@skip_check_grad_ci( + reason="[skip shape check] Use y_shape(1) to test broadcast.") @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseMaxOp_scalar(TestXPUElementwiseOp): +class TestElementwiseMaxOp_scalar(TestElementwiseOp): def setUp(self): - super(TestElementwiseMaxOp_scalar, self).setUp() - self.make_input([2, 3, 20], [1]) - self.make_output() - self.grad_implemented = False + self.op_type = "elementwise_max" + x = np.random.random_integers(-5, 5, [2, 3, 20]).astype("float32") + y = np.array([0.5]).astype("float32") + self.inputs = {'X': x, 'Y': y} + self.outputs = {'Out': np.maximum(self.inputs['X'], self.inputs['Y'])} @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseMaxOp_Vector(TestXPUElementwiseOp): +class TestElementwiseMaxOp_Vector(TestElementwiseOp): def setUp(self): - super(TestElementwiseMaxOp_Vector, self).setUp() - self.make_input([100, ], [100, ]) - self.make_output() + self.op_type = "elementwise_max" + x = np.random.random((100, )).astype("float32") + sgn = np.random.choice([-1, 1], (100, )).astype("float32") + y = x + sgn * np.random.uniform(0.1, 1, (100, )).astype("float32") + self.inputs = {'X': x, 'Y': y} + self.outputs = {'Out': np.maximum(self.inputs['X'], self.inputs['Y'])} @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseMaxOp_broadcast_0(TestXPUElementwiseOp): +class TestElementwiseMaxOp_broadcast_0(TestElementwiseOp): def setUp(self): - super(TestElementwiseMaxOp_broadcast_0, self).setUp() - self.attrs['axis'] = 0 - self.make_input([100, 5, 2], [100, ], [1, 2]) - self.make_output(y_shape=[100, 1, 1]) + self.op_type = "elementwise_max" + x = np.random.uniform(0.5, 1, (100, 5, 2)).astype(np.float32) + sgn = np.random.choice([-1, 1], (100, )).astype(np.float32) + y = x[:, 0, 0] + sgn * \ + np.random.uniform(1, 2, (100, )).astype(np.float32) + self.inputs = {'X': x, 'Y': y} - -@unittest.skipIf(not paddle.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestElementwiseMaxOp_broadcast_1(TestXPUElementwiseOp): - def setUp(self): - super(TestElementwiseMaxOp_broadcast_1, self).setUp() - self.attrs['axis'] = 1 - self.make_input([2, 100, 3], [100, ], [0, 2]) - self.make_output(y_shape=[1, 100, 1]) + self.attrs = {'axis': 0} + self.outputs = { + 'Out': + np.maximum(self.inputs['X'], self.inputs['Y'].reshape(100, 1, 1)) + } @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseMaxOp_broadcast_2(TestXPUElementwiseOp): +class TestElementwiseMaxOp_broadcast_1(TestElementwiseOp): def setUp(self): - super(TestElementwiseMaxOp_broadcast_2, self).setUp() - self.make_input([1, 3, 100], [100, ], [0, 1]) - self.make_output(y_shape=[1, 1, 100]) + self.op_type = "elementwise_max" + x = np.random.uniform(0.5, 1, (2, 100, 3)).astype(np.float32) + sgn = np.random.choice([-1, 1], (100, )).astype(np.float32) + y = x[0, :, 0] + sgn * \ + np.random.uniform(1, 2, (100, )).astype(np.float32) + self.inputs = {'X': x, 'Y': y} + + self.attrs = {'axis': 1} + self.outputs = { + 'Out': + np.maximum(self.inputs['X'], self.inputs['Y'].reshape(1, 100, 1)) + } @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseMaxOp_broadcast_3(TestXPUElementwiseOp): +class TestElementwiseMaxOp_broadcast_2(TestElementwiseOp): def setUp(self): - super(TestElementwiseMaxOp_broadcast_3, self).setUp() - self.attrs['axis'] = 1 - self.make_input([2, 50, 2, 1], [50, 2], [0, 3]) - self.make_output(y_shape=[1, 50, 2, 1]) + self.op_type = "elementwise_max" + x = np.random.uniform(0.5, 1, (1, 3, 100)).astype(np.float32) + sgn = np.random.choice([-1, 1], (100, )).astype(np.float32) + y = x[0, 0, :] + sgn * \ + np.random.uniform(1, 2, (100, )).astype(np.float32) + self.inputs = {'X': x, 'Y': y} + + self.outputs = { + 'Out': + np.maximum(self.inputs['X'], self.inputs['Y'].reshape(1, 1, 100)) + } @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseMaxOp_broadcast_4(TestXPUElementwiseOp): +class TestElementwiseMaxOp_broadcast_3(TestElementwiseOp): def setUp(self): - super(TestElementwiseMaxOp_broadcast_4, self).setUp() - self.make_input([2, 3, 4, 5], [2, 3, 1, 5]) - self.make_output() + self.op_type = "elementwise_max" + x = np.random.uniform(0.5, 1, (2, 50, 2, 1)).astype(np.float32) + sgn = np.random.choice([-1, 1], (50, 2)).astype(np.float32) + y = x[0, :, :, 0] + sgn * \ + np.random.uniform(1, 2, (50, 2)).astype(np.float32) + self.inputs = {'X': x, 'Y': y} + + self.attrs = {'axis': 1} + self.outputs = { + 'Out': + np.maximum(self.inputs['X'], self.inputs['Y'].reshape(1, 50, 2, 1)) + } @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseMaxOp_broadcast_5(TestXPUElementwiseOp): +class TestElementwiseMaxOp_broadcast_4(TestElementwiseOp): def setUp(self): - super(TestElementwiseMaxOp_broadcast_5, self).setUp() - self.make_input([2, 3, 100], [1, 1, 100]) - self.make_output() + self.op_type = "elementwise_max" + x = np.random.uniform(0.5, 1, (2, 3, 4, 5)).astype(np.float32) + sgn = np.random.choice([-1, 1], (2, 3, 1, 5)).astype(np.float32) + y = x + sgn * \ + np.random.uniform(1, 2, (2, 3, 1, 5)).astype(np.float32) + self.inputs = {'X': x, 'Y': y} + + self.outputs = {'Out': np.maximum(self.inputs['X'], self.inputs['Y'])} if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_min_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_min_op_xpu.py new file mode 100644 index 00000000000..ebe2004c3f4 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_min_op_xpu.py @@ -0,0 +1,180 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +sys.path.append("..") +import unittest +import numpy as np +from op_test import OpTest, skip_check_grad_ci +import paddle.fluid as fluid +from paddle.fluid import compiler, Program, program_guard +import paddle +from op_test_xpu import XPUOpTest +paddle.enable_static() + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwiseOp(XPUOpTest): + def setUp(self): + self.op_type = "elementwise_min" + # If x and y have the same value, the min() is not differentiable. + # So we generate test data by the following method + # to avoid them being too close to each other. + x = np.random.uniform(0.1, 1, [13, 17]).astype("float32") + sgn = np.random.choice([-1, 1], [13, 17]).astype("float32") + y = x + sgn * np.random.uniform(0.1, 1, [13, 17]).astype("float32") + self.inputs = {'X': x, 'Y': y} + self.outputs = {'Out': np.minimum(self.inputs['X'], self.inputs['Y'])} + + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + def test_check_grad_normal(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place(place, ['X', 'Y'], 'Out') + + def test_check_grad_ingore_x(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['Y'], + 'Out', + max_relative_error=0.005, + no_grad_set=set("X")) + + def test_check_grad_ingore_y(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['X'], + 'Out', + max_relative_error=0.005, + no_grad_set=set('Y')) + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +@skip_check_grad_ci( + reason="[skip shape check] Use y_shape(1) to test broadcast.") +class TestElementwiseMinOp_scalar(TestElementwiseOp): + def setUp(self): + self.op_type = "elementwise_min" + x = np.random.random_integers(-5, 5, [10, 3, 4]).astype("float32") + y = np.array([0.5]).astype("float32") + self.inputs = {'X': x, 'Y': y} + self.outputs = {'Out': np.minimum(self.inputs['X'], self.inputs['Y'])} + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwiseMinOp_Vector(TestElementwiseOp): + def setUp(self): + self.op_type = "elementwise_min" + x = np.random.random((100, )).astype("float32") + sgn = np.random.choice([-1, 1], (100, )).astype("float32") + y = x + sgn * np.random.uniform(0.1, 1, (100, )).astype("float32") + self.inputs = {'X': x, 'Y': y} + self.outputs = {'Out': np.minimum(self.inputs['X'], self.inputs['Y'])} + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwiseMinOp_broadcast_0(TestElementwiseOp): + def setUp(self): + self.op_type = "elementwise_min" + x = np.random.uniform(0.5, 1, (100, 3, 2)).astype(np.float32) + sgn = np.random.choice([-1, 1], (100, )).astype(np.float32) + y = x[:, 0, 0] + sgn * \ + np.random.uniform(1, 2, (100, )).astype(np.float32) + self.inputs = {'X': x, 'Y': y} + + self.attrs = {'axis': 0} + self.outputs = { + 'Out': + np.minimum(self.inputs['X'], self.inputs['Y'].reshape(100, 1, 1)) + } + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwiseMinOp_broadcast_1(TestElementwiseOp): + def setUp(self): + self.op_type = "elementwise_min" + x = np.random.uniform(0.5, 1, (2, 100, 3)).astype(np.float32) + sgn = np.random.choice([-1, 1], (100, )).astype(np.float32) + y = x[0, :, 0] + sgn * \ + np.random.uniform(1, 2, (100, )).astype(np.float32) + self.inputs = {'X': x, 'Y': y} + + self.attrs = {'axis': 1} + self.outputs = { + 'Out': + np.minimum(self.inputs['X'], self.inputs['Y'].reshape(1, 100, 1)) + } + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwiseMinOp_broadcast_2(TestElementwiseOp): + def setUp(self): + self.op_type = "elementwise_min" + x = np.random.uniform(0.5, 1, (2, 3, 100)).astype(np.float32) + sgn = np.random.choice([-1, 1], (100, )).astype(np.float32) + y = x[0, 0, :] + sgn * \ + np.random.uniform(1, 2, (100, )).astype(np.float32) + self.inputs = {'X': x, 'Y': y} + + self.outputs = { + 'Out': + np.minimum(self.inputs['X'], self.inputs['Y'].reshape(1, 1, 100)) + } + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwiseMinOp_broadcast_3(TestElementwiseOp): + def setUp(self): + self.op_type = "elementwise_min" + x = np.random.uniform(0.5, 1, (2, 25, 4, 1)).astype(np.float32) + sgn = np.random.choice([-1, 1], (25, 4)).astype(np.float32) + y = x[0, :, :, 0] + sgn * \ + np.random.uniform(1, 2, (25, 4)).astype(np.float32) + self.inputs = {'X': x, 'Y': y} + + self.attrs = {'axis': 1} + self.outputs = { + 'Out': + np.minimum(self.inputs['X'], self.inputs['Y'].reshape(1, 25, 4, 1)) + } + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwiseMinOp_broadcast_4(TestElementwiseOp): + def setUp(self): + self.op_type = "elementwise_min" + x = np.random.uniform(0.5, 1, (2, 10, 2, 5)).astype(np.float32) + sgn = np.random.choice([-1, 1], (2, 10, 1, 5)).astype(np.float32) + y = x + sgn * \ + np.random.uniform(1, 2, (2, 10, 1, 5)).astype(np.float32) + self.inputs = {'X': x, 'Y': y} + + self.outputs = {'Out': np.minimum(self.inputs['X'], self.inputs['Y'])} + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mul_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mul_op_xpu.py index 3fa9c6d84e2..39fd07cb7a9 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mul_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_mul_op_xpu.py @@ -19,58 +19,111 @@ from op_test import OpTest, skip_check_grad_ci import paddle.fluid as fluid from paddle.fluid import compiler, Program, program_guard import paddle -from elementwise import TestXPUElementwiseOpBase +from op_test_xpu import XPUOpTest paddle.enable_static() @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestXPUElementwiseMulOp(OpTest, TestXPUElementwiseOpBase): +class ElementwiseMulOp(XPUOpTest): def init_kernel_type(self): self.use_mkldnn = False def setUp(self): - TestXPUElementwiseOpBase.setUp(self, "elementwise_mul") + self.use_xpu = True + self.op_type = "elementwise_mul" + self.dtype = np.float32 + self.axis = -1 + self.init_dtype() + self.init_input_output() self.init_kernel_type() self.init_axis() - self.attrs['axis'] = self.axis - self.attrs['use_mkldnn'] = self.use_mkldnn - self.grad_implemented = True - self.make_input() - self.make_output() - def make_output(self, x_shape=None, y_shape=None): - x, y = self.reshape_input(x_shape, y_shape) - self.outputs = {'Out': np.multiply(x, y)} + self.inputs = { + 'X': OpTest.np_dtype_to_fluid_dtype(self.x), + 'Y': OpTest.np_dtype_to_fluid_dtype(self.y) + } + self.outputs = {'Out': self.out} + self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn} + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + def test_check_grad_normal(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['X', 'Y'], + 'Out', + check_dygraph=(self.use_mkldnn == False)) + + def test_check_grad_ingore_x(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['Y'], + 'Out', + no_grad_set=set("X"), + check_dygraph=(self.use_mkldnn == False)) + + def test_check_grad_ingore_y(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['X'], + 'Out', + no_grad_set=set('Y'), + check_dygraph=(self.use_mkldnn == False)) + + def init_input_output(self): + self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) + self.y = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) + self.out = np.multiply(self.x, self.y) + + def init_dtype(self): + pass + + def init_axis(self): + pass + + +@skip_check_grad_ci( + reason="[skip shape check] Use y_shape(1) to test broadcast.") @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestXPUElementwiseMulOp_scalar(TestXPUElementwiseMulOp): +class TestElementwiseMulOp_scalar(ElementwiseMulOp): def setUp(self): - super(TestXPUElementwiseMulOp_scalar, self).setUp() - self.make_input((10, 3, 4), (1, )) - self.make_output() - self.grad_implemented = False + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.rand(10, 3, 4).astype(np.float32), + 'Y': np.random.rand(1).astype(np.float32) + } + self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']} + self.init_kernel_type() @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestXPUElementwiseMulOp_Vector(TestXPUElementwiseMulOp): +class TestElementwiseMulOp_Vector(ElementwiseMulOp): def setUp(self): - super(TestXPUElementwiseMulOp_Vector, self).setUp() - self.make_input((100, ), (100, )) - self.make_output() + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.random((100, )).astype("float32"), + 'Y': np.random.random((100, )).astype("float32") + } + self.outputs = {'Out': np.multiply(self.inputs['X'], self.inputs['Y'])} + self.init_kernel_type() @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestXPUElementwiseMulOp_broadcast_0(TestXPUElementwiseMulOp): - def setUp(self): - super(TestXPUElementwiseMulOp_broadcast_0, self).setUp() - self.make_input((100, 2, 3), (100, )) - self.make_output(y_shape=(100, 1, 1)) - self.y_grad_implemented = False +class TestElementwiseMulOp_broadcast_0(ElementwiseMulOp): + def init_input_output(self): + self.x = np.random.rand(100, 2, 3).astype(self.dtype) + self.y = np.random.rand(100).astype(self.dtype) + self.out = self.x * self.y.reshape(100, 1, 1) def init_axis(self): self.axis = 0 @@ -78,75 +131,140 @@ class TestXPUElementwiseMulOp_broadcast_0(TestXPUElementwiseMulOp): @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseMulOp_broadcast_1(TestXPUElementwiseMulOp): +class TestElementwiseMulOp_broadcast_1(ElementwiseMulOp): + def setUp(self): + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.rand(2, 100, 3).astype(np.float32), + 'Y': np.random.rand(100).astype(np.float32) + } + + self.attrs = {'axis': 1} + self.outputs = { + 'Out': self.inputs['X'] * self.inputs['Y'].reshape(1, 100, 1) + } + self.init_kernel_type() + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwiseMulOp_broadcast_2(ElementwiseMulOp): def setUp(self): - super(TestElementwiseMulOp_broadcast_1, self).setUp() - self.attrs['axis'] = 1 - self.y_grad_implemented = False - self.make_input((2, 100, 3), (100, )) - self.make_output(y_shape=(1, 100, 1)) + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.rand(2, 3, 100).astype(np.float32), + 'Y': np.random.rand(100).astype(np.float32) + } + + self.outputs = { + 'Out': self.inputs['X'] * self.inputs['Y'].reshape(1, 1, 100) + } + self.init_kernel_type() @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseMulOp_broadcast_2(TestXPUElementwiseMulOp): +class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp): def setUp(self): - super(TestElementwiseMulOp_broadcast_2, self).setUp() - self.y_grad_implemented = False - self.make_input((2, 3, 100), (100, )) - self.make_output(y_shape=(1, 1, 100)) + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.rand(2, 10, 12, 3).astype(np.float32), + 'Y': np.random.rand(10, 12).astype(np.float32) + } + + self.attrs = {'axis': 1} + self.outputs = { + 'Out': self.inputs['X'] * self.inputs['Y'].reshape(1, 10, 12, 1) + } + self.init_kernel_type() @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseMulOp_broadcast_3(TestXPUElementwiseMulOp): +class TestElementwiseMulOp_broadcast_4(ElementwiseMulOp): def setUp(self): - super(TestElementwiseMulOp_broadcast_3, self).setUp() - self.attrs['axis'] = 1 - self.y_grad_implemented = False - self.make_input((2, 10, 12, 3), (10, 12)) - self.make_output(y_shape=(1, 10, 12, 1)) + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.rand(10, 2, 11).astype(np.float32), + 'Y': np.random.rand(10, 1, 11).astype(np.float32) + } + self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']} + self.init_kernel_type() @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseMulOp_broadcast_4(TestXPUElementwiseMulOp): +class TestElementwiseMulOp_broadcast_5(ElementwiseMulOp): def setUp(self): - super(TestElementwiseMulOp_broadcast_4, self).setUp() - self.is_common_broadcast = True - self.make_input((10, 2, 11), (10, 1, 11)) - self.make_output() + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.rand(10, 4, 2, 3).astype(np.float32), + 'Y': np.random.rand(10, 4, 1, 3).astype(np.float32) + } + self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']} + self.init_kernel_type() @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseMulOp_broadcast_5(TestXPUElementwiseMulOp): +class TestElementwiseMulOp_commonuse_1(ElementwiseMulOp): def setUp(self): - super(TestElementwiseMulOp_broadcast_5, self).setUp() - self.is_common_broadcast = True - self.make_input((10, 4, 2, 3), (10, 4, 1, 3)) - self.make_output() + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.rand(2, 3, 100).astype(np.float32), + 'Y': np.random.rand(1, 1, 100).astype(np.float32) + } + self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']} + self.init_kernel_type() @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestXPUElementwiseMulOp_commonuse_1(TestXPUElementwiseMulOp): +class TestElementwiseMulOp_commonuse_2(ElementwiseMulOp): def setUp(self): - super(TestXPUElementwiseMulOp_commonuse_1, self).setUp() - self.is_common_broadcast = True - self.make_input((2, 3, 100), (1, 1, 100)) - self.make_output() + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.rand(30, 3, 1, 5).astype(np.float32), + 'Y': np.random.rand(30, 1, 4, 1).astype(np.float32) + } + self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']} + self.init_kernel_type() @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestXPUElementwiseMulOp_xsize_lessthan_ysize(TestXPUElementwiseMulOp): +class TestElementwiseMulOp_xsize_lessthan_ysize(ElementwiseMulOp): def setUp(self): - super(TestXPUElementwiseMulOp_xsize_lessthan_ysize, self).setUp() - self.attrs['axis'] = 2 - self.is_x_size_less_than_y = True - self.make_input((10, 10), (2, 2, 10, 10)) - self.make_output(x_shape=(1, 1, 10, 10)) + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.rand(10, 10).astype(np.float32), + 'Y': np.random.rand(2, 2, 10, 10).astype(np.float32) + } + + self.attrs = {'axis': 2} + + self.outputs = { + 'Out': self.inputs['X'].reshape(1, 1, 10, 10) * self.inputs['Y'] + } + self.init_kernel_type() + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwiseMulOpError(unittest.TestCase): + def test_errors(self): + with program_guard(Program(), Program()): + # the input of elementwise_mul must be Variable. + x1 = fluid.create_lod_tensor( + np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.XPUPlace(0)) + y1 = fluid.create_lod_tensor( + np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.XPUPlace(0)) + self.assertRaises(TypeError, fluid.layers.elementwise_mul, x1, y1) + + # the input dtype of elementwise_mul must be float32 + x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="uint8") + y2 = fluid.layers.data(name='y2', shape=[3, 4, 5, 6], dtype="uint8") + self.assertRaises(TypeError, fluid.layers.elementwise_mul, x2, y2) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_pow_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_pow_op_xpu.py new file mode 100644 index 00000000000..cbad3761196 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_pow_op_xpu.py @@ -0,0 +1,182 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +sys.path.append("..") +import unittest +import numpy as np +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +from op_test import OpTest, skip_check_grad_ci +from op_test_xpu import XPUOpTest +paddle.enable_static() + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwisePowOp(XPUOpTest): + def setUp(self): + self.op_type = "elementwise_pow" + self.inputs = { + 'X': np.random.uniform(1, 2, [20, 5]).astype("float32"), + 'Y': np.random.uniform(1, 2, [20, 5]).astype("float32") + } + self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])} + + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + def test_check_grad_normal(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place(place, ['X', 'Y'], 'Out') + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwisePowOp_big_shape_1(TestElementwisePowOp): + def setUp(self): + self.op_type = "elementwise_pow" + self.inputs = { + 'X': np.random.uniform(1, 2, [10, 10]).astype("float32"), + 'Y': np.random.uniform(0.1, 1, [10, 10]).astype("float32") + } + self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])} + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwisePowOp_big_shape_2(TestElementwisePowOp): + def setUp(self): + self.op_type = "elementwise_pow" + self.inputs = { + 'X': np.random.uniform(1, 2, [10, 10]).astype("float32"), + 'Y': np.random.uniform(0.2, 2, [10, 10]).astype("float32") + } + self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])} + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +@skip_check_grad_ci( + reason="[skip shape check] Use y_shape(1) to test broadcast.") +class TestElementwisePowOp_scalar(TestElementwisePowOp): + def setUp(self): + self.op_type = "elementwise_pow" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [3, 3, 4]).astype(np.float32), + 'Y': np.random.uniform(0.1, 1, [1]).astype(np.float32) + } + self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])} + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwisePowOp_tensor(TestElementwisePowOp): + def setUp(self): + self.op_type = "elementwise_pow" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [100]).astype("float32"), + 'Y': np.random.uniform(1, 3, [100]).astype("float32") + } + self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])} + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwisePowOp_broadcast_0(TestElementwisePowOp): + def setUp(self): + self.op_type = "elementwise_pow" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [2, 1, 100]).astype("float32"), + 'Y': np.random.uniform(0.1, 1, [100]).astype("float32") + } + self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])} + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwisePowOp_broadcast_1(TestElementwisePowOp): + def setUp(self): + self.op_type = "elementwise_pow" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [2, 100, 1]).astype("float32"), + 'Y': np.random.uniform(0.1, 1, [100]).astype("float32") + } + self.attrs = {'axis': 1} + self.outputs = { + 'Out': np.power(self.inputs['X'], self.inputs['Y'].reshape(100, 1)) + } + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwisePowOp_broadcast_2(TestElementwisePowOp): + def setUp(self): + self.op_type = "elementwise_pow" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [100, 3, 1]).astype("float32"), + 'Y': np.random.uniform(0.1, 1, [100]).astype("float32") + } + self.attrs = {'axis': 0} + self.outputs = { + 'Out': + np.power(self.inputs['X'], self.inputs['Y'].reshape(100, 1, 1)) + } + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwisePowOp_broadcast_3(TestElementwisePowOp): + def setUp(self): + self.op_type = "elementwise_pow" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [2, 20, 5, 1]).astype("float32"), + 'Y': np.random.uniform(0.1, 1, [20, 5]).astype("float32") + } + self.attrs = {'axis': 1} + self.outputs = { + 'Out': np.power(self.inputs['X'], self.inputs['Y'].reshape(1, 20, 5, + 1)) + } + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwisePowOp_broadcast_4(TestElementwisePowOp): + def setUp(self): + self.op_type = "elementwise_pow" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [2, 10, 3, 5]).astype("float32"), + 'Y': np.random.uniform(0.1, 1, [2, 10, 1, 5]).astype("float32") + } + self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])} + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestElementwisePowOpInt(OpTest): + def setUp(self): + self.op_type = "elementwise_pow" + self.inputs = {'X': np.asarray([1, 3, 6]), 'Y': np.asarray([1, 1, 1])} + self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])} + + def test_check_output(self): + self.check_output() + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_sub_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_sub_op_xpu.py index 22aa07be951..3bc9fa067a6 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_sub_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_sub_op_xpu.py @@ -11,117 +11,198 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import unittest + import numpy as np import sys sys.path.append("..") -from op_test import OpTest, skip_check_grad_ci import paddle -from elementwise import TestXPUElementwiseOpBase +from op_test import OpTest, skip_check_grad_ci +from op_test_xpu import XPUOpTest +import unittest paddle.enable_static() @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestXPUElementwiseSubOp(OpTest, TestXPUElementwiseOpBase): +class TestElementwiseOp(OpTest): def setUp(self): - TestXPUElementwiseOpBase.setUp(self, "elementwise_sub") - self.make_input() - self.make_output() - self.grad_implemented = True + self.use_xpu = True + self.op_type = "elementwise_sub" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype("float32"), + 'Y': np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype("float32") + } + self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']} + + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place, atol=1e-3) + + def test_check_grad_normal(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place(place, ['X', 'Y'], 'Out') + + def test_check_grad_ingore_x(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['Y'], + 'Out', + max_relative_error=0.005, + no_grad_set=set("X")) + + def test_check_grad_ingore_y(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['X'], + 'Out', + max_relative_error=0.005, + no_grad_set=set('Y')) - def make_output(self, x_shape=None, y_shape=None): - x, y = self.reshape_input(x_shape, y_shape) - self.outputs = {'Out': x - y} + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +@skip_check_grad_ci( + reason="[skip shape check] Use y_shape(1) to test broadcast.") +class TestElementwiseSubOp_scalar(TestElementwiseOp): + def setUp(self): + self.op_type = "elementwise_sub" + self.inputs = { + 'X': np.random.rand(10, 3, 4).astype(np.float32), + 'Y': np.random.rand(1).astype(np.float32) + } + self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']} @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseSubOp_scalar(TestXPUElementwiseSubOp): +class TestElementwiseSubOp_Vector(TestElementwiseOp): def setUp(self): - super(TestElementwiseSubOp_scalar, self).setUp() - self.grad_implemented = False - self.make_input((10, 3, 4), (1, )) - self.make_output() + self.op_type = "elementwise_sub" + self.inputs = { + 'X': np.random.random((100, )).astype("float32"), + 'Y': np.random.random((100, )).astype("float32") + } + self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']} @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseSubOp_Vector(TestXPUElementwiseSubOp): +class TestElementwiseSubOp_broadcast_0(TestElementwiseOp): def setUp(self): - super(TestElementwiseSubOp_Vector, self).setUp() - self.make_input((100, ), (100, )) - self.make_output() + self.op_type = "elementwise_sub" + self.inputs = { + 'X': np.random.rand(100, 3, 2).astype(np.float32), + 'Y': np.random.rand(100).astype(np.float32) + } + + self.attrs = {'axis': 0} + self.outputs = { + 'Out': self.inputs['X'] - self.inputs['Y'].reshape(100, 1, 1) + } @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseSubOp_broadcast_0(TestXPUElementwiseSubOp): +class TestElementwiseSubOp_broadcast_1(TestElementwiseOp): def setUp(self): - super(TestElementwiseSubOp_broadcast_0, self).setUp() - self.attrs['axis'] = 0 - self.make_input((100, 3, 2), (100, )) - self.make_output(y_shape=(100, 1, 1)) + self.op_type = "elementwise_sub" + self.inputs = { + 'X': np.random.rand(2, 100, 3).astype(np.float32), + 'Y': np.random.rand(100).astype(np.float32) + } + + self.attrs = {'axis': 1} + self.outputs = { + 'Out': self.inputs['X'] - self.inputs['Y'].reshape(1, 100, 1) + } @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseSubOp_broadcast_1(TestXPUElementwiseSubOp): +class TestElementwiseSubOp_broadcast_2(TestElementwiseOp): def setUp(self): - super(TestElementwiseSubOp_broadcast_1, self).setUp() - self.attrs['axis'] = 1 - self.make_input((2, 100, 3), (100, )) - self.make_output(y_shape=(1, 100, 1)) + self.op_type = "elementwise_sub" + self.inputs = { + 'X': np.random.rand(2, 3, 100).astype(np.float32), + 'Y': np.random.rand(100).astype(np.float32) + } + + self.outputs = { + 'Out': self.inputs['X'] - self.inputs['Y'].reshape(1, 1, 100) + } @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseSubOp_broadcast_2(TestXPUElementwiseSubOp): +class TestElementwiseSubOp_broadcast_3(TestElementwiseOp): def setUp(self): - super(TestElementwiseSubOp_broadcast_2, self).setUp() - self.make_input((2, 3, 100), (100, )) - self.make_output(y_shape=(1, 1, 100)) + self.op_type = "elementwise_sub" + self.inputs = { + 'X': np.random.rand(2, 10, 12, 3).astype(np.float32), + 'Y': np.random.rand(10, 12).astype(np.float32) + } + + self.attrs = {'axis': 1} + self.outputs = { + 'Out': self.inputs['X'] - self.inputs['Y'].reshape(1, 10, 12, 1) + } @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseSubOp_broadcast_3(TestXPUElementwiseSubOp): +class TestElementwiseSubOp_broadcast_4(TestElementwiseOp): def setUp(self): - super(TestElementwiseSubOp_broadcast_3, self).setUp() - self.attrs['axis'] = 1 - self.make_input((2, 10, 12, 3), (10, 12)) - self.make_output(y_shape=(1, 10, 12, 1)) + self.op_type = "elementwise_sub" + self.inputs = { + 'X': np.random.rand(2, 5, 3, 12).astype(np.float32), + 'Y': np.random.rand(2, 5, 1, 12).astype(np.float32) + } + self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']} @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseSubOp_broadcast_4(TestXPUElementwiseSubOp): +class TestElementwiseSubOp_commonuse_1(TestElementwiseOp): def setUp(self): - super(TestElementwiseSubOp_broadcast_4, self).setUp() - self.is_common_broadcast = True - self.make_input((2, 5, 3, 12), (2, 5, 1, 12)) - self.make_output() + self.op_type = "elementwise_sub" + self.inputs = { + 'X': np.random.rand(2, 3, 100).astype(np.float32), + 'Y': np.random.rand(1, 1, 100).astype(np.float32) + } + self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']} @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseSubOp_commonuse_1(TestXPUElementwiseSubOp): +class TestElementwiseSubOp_commonuse_2(TestElementwiseOp): def setUp(self): - super(TestElementwiseSubOp_commonuse_1, self).setUp() - self.is_common_broadcast = True - self.make_input((2, 3, 100), (1, 1, 100)) - self.make_output() + self.op_type = "elementwise_sub" + self.inputs = { + 'X': np.random.rand(10, 3, 1, 4).astype(np.float32), + 'Y': np.random.rand(10, 1, 12, 1).astype(np.float32) + } + self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']} @unittest.skipIf(not paddle.is_compiled_with_xpu(), "core is not compiled with XPU") -class TestElementwiseSubOp_xsize_lessthan_ysize(TestXPUElementwiseSubOp): +class TestElementwiseSubOp_xsize_lessthan_ysize(TestElementwiseOp): def setUp(self): - super(TestElementwiseSubOp_xsize_lessthan_ysize, self).setUp() - self.attrs['axis'] = 2 - self.is_x_size_less_than_y = True - self.make_input((10, 12), (2, 3, 10, 12)) - self.make_output(x_shape=(1, 1, 10, 12)) + self.op_type = "elementwise_sub" + self.inputs = { + 'X': np.random.rand(10, 12).astype(np.float32), + 'Y': np.random.rand(2, 3, 10, 12).astype(np.float32) + } + + self.attrs = {'axis': 2} + + self.outputs = { + 'Out': self.inputs['X'].reshape(1, 1, 10, 12) - self.inputs['Y'] + } if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/xpu/test_softmax_with_cross_entropy_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_softmax_with_cross_entropy_op_xpu.py index 80e83e030fe..5a8985315ea 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_softmax_with_cross_entropy_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_softmax_with_cross_entropy_op_xpu.py @@ -13,16 +13,15 @@ # limitations under the License. from __future__ import print_function +from test_softmax_op import stable_softmax +from op_test import OpTest +import paddle.fluid.core as core +import paddle import unittest import numpy as np import sys sys.path.append("..") -import paddle -import paddle.fluid.core as core - -from op_test import OpTest -from test_softmax_op import stable_softmax def cross_entropy(softmax, label, soft_label, axis, ignore_index=-1): @@ -54,10 +53,11 @@ class TestSoftmaxWithCrossEntropyOp(OpTest): self.op_type = "softmax_with_cross_entropy" self.numeric_stable_mode = False self.soft_label = False - self.dtype = np.float64 + self.dtype = np.float32 self.axis = -1 self.ignore_index = -1 self.shape = [41, 37] + self.use_xpu = True def setUp(self): self.initParams() @@ -103,7 +103,7 @@ class TestSoftmaxWithCrossEntropyOp(OpTest): paddle.enable_static() place = paddle.XPUPlace(0) self.check_grad_with_place( - place, ["Logits"], "Loss", max_relative_error=0.1) + place, ["Logits"], "Loss", max_relative_error=0.2) class TestXPUSoftmaxWithCrossEntropyOp(TestSoftmaxWithCrossEntropyOp): @@ -115,6 +115,7 @@ class TestXPUSoftmaxWithCrossEntropyOp(TestSoftmaxWithCrossEntropyOp): self.axis = -1 self.ignore_index = -1 self.dtype = np.float32 + self.use_xpu = True def test_check_output(self): if paddle.is_compiled_with_xpu(): @@ -127,7 +128,7 @@ class TestXPUSoftmaxWithCrossEntropyOp(TestSoftmaxWithCrossEntropyOp): paddle.enable_static() place = paddle.XPUPlace(0) self.check_grad_with_place( - place, ["Logits"], "Loss", max_relative_error=0.1) + place, ["Logits"], "Loss", max_relative_error=0.2) class TestXPUSoftmaxWithCrossEntropyOp2(TestXPUSoftmaxWithCrossEntropyOp): @@ -139,10 +140,11 @@ class TestXPUSoftmaxWithCrossEntropyOp2(TestXPUSoftmaxWithCrossEntropyOp): self.op_type = "softmax_with_cross_entropy" self.numeric_stable_mode = True self.soft_label = True - self.dtype = np.float64 + self.dtype = np.float32 self.axis = -1 self.ignore_index = -1 self.shape = [41, 37] + self.use_xpu = True def test_check_output(self): if paddle.is_compiled_with_xpu(): @@ -155,7 +157,7 @@ class TestXPUSoftmaxWithCrossEntropyOp2(TestXPUSoftmaxWithCrossEntropyOp): paddle.enable_static() place = paddle.XPUPlace(0) self.check_grad_with_place( - place, ["Logits"], "Loss", max_relative_error=0.1) + place, ["Logits"], "Loss", max_relative_error=0.2) class TestXPUSoftmaxWithCrossEntropyOp3(TestXPUSoftmaxWithCrossEntropyOp): @@ -170,55 +172,56 @@ class TestXPUSoftmaxWithCrossEntropyOp3(TestXPUSoftmaxWithCrossEntropyOp): self.shape = [41, 37] self.ignore_index = 5 self.axis = -1 - self.dtype = np.float64 - - -class TestXPUSoftmaxWithCrossEntropyOpAxis1(TestXPUSoftmaxWithCrossEntropyOp): - """ - Test softmax with cross entropy operator with discreate one-hot labels. - Given axis != -1 - """ - - def initParams(self): - self.op_type = "softmax_with_cross_entropy" - self.numeric_stable_mode = True - self.soft_label = False - self.dtype = np.float64 - self.axis = 0 - self.ignore_index = -1 - self.shape = [3, 5, 7, 11] - - -class TestXPUSoftmaxWithCrossEntropyOpAxis2(TestXPUSoftmaxWithCrossEntropyOp): - """ - Test softmax with cross entropy operator with discreate one-hot labels. - Given axis != -1 - """ - - def initParams(self): - self.op_type = "softmax_with_cross_entropy" - self.numeric_stable_mode = True - self.soft_label = False - self.dtype = np.float64 - self.axis = 1 - self.ignore_index = -1 - self.shape = [3, 5, 7, 11] - + self.dtype = np.float32 -class TestXPUSoftmaxWithCrossEntropyOpAxis3(TestXPUSoftmaxWithCrossEntropyOp): - """ - Test softmax with cross entropy operator with discreate one-hot labels. - Given axis != -1 - """ - def initParams(self): - self.op_type = "softmax_with_cross_entropy" - self.numeric_stable_mode = True - self.soft_label = False - self.dtype = np.float64 - self.axis = 2 - self.ignore_index = -1 - self.shape = [3, 5, 7, 11] +# xpu only support axis = rank -1 +# class TestXPUSoftmaxWithCrossEntropyOpAxis1(TestXPUSoftmaxWithCrossEntropyOp): +# """ +# Test softmax with cross entropy operator with discreate one-hot labels. +# Given axis != -1 +# """ + +# def initParams(self): +# self.op_type = "softmax_with_cross_entropy" +# self.numeric_stable_mode = True +# self.soft_label = False +# self.dtype = np.float32 +# self.axis = 0 +# self.ignore_index = -1 +# self.shape = [3, 5, 7, 11] + +# xpu only support axis = rank -1 +# class TestXPUSoftmaxWithCrossEntropyOpAxis2(TestXPUSoftmaxWithCrossEntropyOp): +# """ +# Test softmax with cross entropy operator with discreate one-hot labels. +# Given axis != -1 +# """ + +# def initParams(self): +# self.op_type = "softmax_with_cross_entropy" +# self.numeric_stable_mode = True +# self.soft_label = False +# self.dtype = np.float32 +# self.axis = 1 +# self.ignore_index = -1 +# self.shape = [3, 5, 7, 11] + +# xpu only support axis = rank -1 +# class TestXPUSoftmaxWithCrossEntropyOpAxis3(TestXPUSoftmaxWithCrossEntropyOp): +# """ +# Test softmax with cross entropy operator with discreate one-hot labels. +# Given axis != -1 +# """ + +# def initParams(self): +# self.op_type = "softmax_with_cross_entropy" +# self.numeric_stable_mode = True +# self.soft_label = False +# self.dtype = np.float32 +# self.axis = 2 +# self.ignore_index = -1 +# self.shape = [3, 5, 7, 11] class TestXPUSoftmaxWithCrossEntropyOpAxis4(TestXPUSoftmaxWithCrossEntropyOp): @@ -231,7 +234,7 @@ class TestXPUSoftmaxWithCrossEntropyOpAxis4(TestXPUSoftmaxWithCrossEntropyOp): self.op_type = "softmax_with_cross_entropy" self.numeric_stable_mode = True self.soft_label = False - self.dtype = np.float64 + self.dtype = np.float32 self.axis = 3 self.ignore_index = -1 self.shape = [3, 5, 7, 11] @@ -248,46 +251,47 @@ class TestXPUSoftmaxWithCrossEntropyOpAxisDimEqualOne( self.op_type = "softmax_with_cross_entropy" self.numeric_stable_mode = True self.soft_label = False - self.dtype = np.float64 + self.dtype = np.float32 self.axis = -1 self.ignore_index = -1 self.shape = [3, 5, 7, 1] -class TestXPUSoftmaxWithCrossEntropyOpSoftLabelAxis1( - TestXPUSoftmaxWithCrossEntropyOp): - def initParams(self): - self.op_type = "softmax_with_cross_entropy" - self.numeric_stable_mode = True - self.soft_label = True - self.shape = [3, 5, 7, 11] - self.axis = 0 - self.ignore_index = -1 - self.dtype = np.float64 - - -class TestXPUSoftmaxWithCrossEntropyOpSoftLabelAxis2( - TestXPUSoftmaxWithCrossEntropyOp2): - def initParams(self): - self.op_type = "softmax_with_cross_entropy" - self.numeric_stable_mode = True - self.soft_label = True - self.shape = [3, 5, 7, 11] - self.axis = 1 - self.ignore_index = -1 - self.dtype = np.float64 - - -class TestXPUSoftmaxWithCrossEntropyOpSoftLabelAxis3( - TestXPUSoftmaxWithCrossEntropyOp2): - def initParams(self): - self.op_type = "softmax_with_cross_entropy" - self.numeric_stable_mode = True - self.soft_label = True - self.shape = [3, 5, 7, 11] - self.axis = 2 - self.ignore_index = -1 - self.dtype = np.float64 +# xpu only support axis = rank -1 +# class TestXPUSoftmaxWithCrossEntropyOpSoftLabelAxis1( +# TestXPUSoftmaxWithCrossEntropyOp): +# def initParams(self): +# self.op_type = "softmax_with_cross_entropy" +# self.numeric_stable_mode = True +# self.soft_label = True +# self.shape = [3, 5, 7, 11] +# self.axis = 0 +# self.ignore_index = -1 +# self.dtype = np.float32 + +# xpu only support axis = rank -1 +# class TestXPUSoftmaxWithCrossEntropyOpSoftLabelAxis2( +# TestXPUSoftmaxWithCrossEntropyOp2): +# def initParams(self): +# self.op_type = "softmax_with_cross_entropy" +# self.numeric_stable_mode = True +# self.soft_label = True +# self.shape = [3, 5, 7, 11] +# self.axis = 1 +# self.ignore_index = -1 +# self.dtype = np.float32 + +# xpu only support axis = rank -1 +# class TestXPUSoftmaxWithCrossEntropyOpSoftLabelAxis3( +# TestXPUSoftmaxWithCrossEntropyOp2): +# def initParams(self): +# self.op_type = "softmax_with_cross_entropy" +# self.numeric_stable_mode = True +# self.soft_label = True +# self.shape = [3, 5, 7, 11] +# self.axis = 2 +# self.ignore_index = -1 +# self.dtype = np.float32 class TestXPUSoftmaxWithCrossEntropyOpSoftLabelAxis4( @@ -299,43 +303,44 @@ class TestXPUSoftmaxWithCrossEntropyOpSoftLabelAxis4( self.shape = [3, 5, 7, 11] self.axis = 3 self.ignore_index = -1 - self.dtype = np.float64 - - -class TestXPUSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis1( - TestXPUSoftmaxWithCrossEntropyOp3): - def initParams(self): - self.op_type = "softmax_with_cross_entropy" - self.numeric_stable_mode = True - self.soft_label = False - self.shape = [3, 5, 7, 11] - self.ignore_index = 1 - self.axis = 0 - self.dtype = np.float64 - - -class TestXPUSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis2( - TestXPUSoftmaxWithCrossEntropyOp3): - def initParams(self): - self.op_type = "softmax_with_cross_entropy" - self.numeric_stable_mode = True - self.soft_label = False - self.shape = [3, 5, 7, 11] - self.ignore_index = 0 - self.axis = 1 - self.dtype = np.float64 + self.dtype = np.float32 -class TestXPUSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis3( - TestXPUSoftmaxWithCrossEntropyOp3): - def initParams(self): - self.op_type = "softmax_with_cross_entropy" - self.numeric_stable_mode = True - self.soft_label = False - self.shape = [3, 5, 7, 11] - self.ignore_index = 3 - self.axis = 2 - self.dtype = np.float64 +# xpu only support axis = rank -1 +# class TestXPUSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis1( +# TestXPUSoftmaxWithCrossEntropyOp3): +# def initParams(self): +# self.op_type = "softmax_with_cross_entropy" +# self.numeric_stable_mode = True +# self.soft_label = False +# self.shape = [3, 5, 7, 11] +# self.ignore_index = 1 +# self.axis = 0 +# self.dtype = np.float32 + +# xpu only support axis = rank -1 +# class TestXPUSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis2( +# TestXPUSoftmaxWithCrossEntropyOp3): +# def initParams(self): +# self.op_type = "softmax_with_cross_entropy" +# self.numeric_stable_mode = True +# self.soft_label = False +# self.shape = [3, 5, 7, 11] +# self.ignore_index = 0 +# self.axis = 1 +# self.dtype = np.float32 + +# xpu only support axis = rank -1 +# class TestXPUSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis3( +# TestXPUSoftmaxWithCrossEntropyOp3): +# def initParams(self): +# self.op_type = "softmax_with_cross_entropy" +# self.numeric_stable_mode = True +# self.soft_label = False +# self.shape = [3, 5, 7, 11] +# self.ignore_index = 3 +# self.axis = 2 +# self.dtype = np.float32 class TestXPUSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis4( @@ -347,7 +352,7 @@ class TestXPUSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis4( self.shape = [3, 5, 7, 11] self.ignore_index = 3 self.axis = 3 - self.dtype = np.float64 + self.dtype = np.float32 class TestXPUSoftmaxWithCrossEntropyOpBoundary0( @@ -364,7 +369,7 @@ class TestXPUSoftmaxWithCrossEntropyOpBoundary0( self.shape = [3, 5, 7, 11] self.axis = -1 self.ignore_index = -1 - self.dtype = np.float64 + self.dtype = np.float32 self.logits = np.full(self.shape, -500.0).astype(self.dtype) @@ -382,7 +387,7 @@ class TestXPUSoftmaxWithCrossEntropyOpBoundary1( self.shape = [3, 5, 7, 11] self.axis = -1 self.ignore_index = -1 - self.dtype = np.float64 + self.dtype = np.float32 self.logits = np.full(self.shape, 1000.0).astype(self.dtype) self.logits[:, :, 0, :] = -1000.0 -- GitLab