未验证 提交 a5aa4dc7 编写于 作者: T taixiurong 提交者: GitHub

add xpu elementwise ops (#29031)

上级 e9acd9c9
...@@ -27,7 +27,7 @@ template <typename DeviceContext, typename T> ...@@ -27,7 +27,7 @@ template <typename DeviceContext, typename T>
class ElementwiseAddXPUKernel : public framework::OpKernel<T> { class ElementwiseAddXPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
XPUElementwise<T, XPUAddFunctor<T>>(ctx); XPUElementwise<T>(ctx, xpu::add<T>);
} }
}; };
...@@ -36,161 +36,7 @@ class ElementwiseAddGradXPUKernel : public ElemwiseGradKernel<T> { ...@@ -36,161 +36,7 @@ class ElementwiseAddGradXPUKernel : public ElemwiseGradKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
ElemwiseGradKernel<T>::Compute(ctx); ElemwiseGradKernel<T>::Compute(ctx);
using Tensor = framework::Tensor; XPUElementwiseGrad<T>(ctx, xpu::add_grad<T>, false);
auto *dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto *dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto *dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
auto dx_dims = dout->dims();
auto dy_dims_untrimed = dout->dims();
T *dx_data = NULL;
T *dy_data = NULL;
int axis = ctx.Attr<int>("axis");
PADDLE_ENFORCE_GE(dx_dims.size(), dy_dims_untrimed.size(),
platform::errors::InvalidArgument(
"Rank of first input must >= rank of second input."));
if (dx != nullptr) {
dx->mutable_data<T>(ctx.GetPlace());
dx_dims = dx->dims();
dx_data = dx->data<T>();
}
if (dy != nullptr) {
dy->mutable_data<T>(ctx.GetPlace());
dy_dims_untrimed = dy->dims();
dy_data = dy->data<T>();
}
int pre, n, post, is_common_broadcast;
if (dx_dims == dy_dims_untrimed) {
pre = post = 1;
n = dout->numel();
} else {
axis = (axis == -1 ? dx_dims.size() - dy_dims_untrimed.size() : axis);
PADDLE_ENFORCE_EQ(axis >= 0 && axis < dx_dims.size(), true,
platform::errors::InvalidArgument(
"Axis should be in range [0, dx_dims)"));
auto dy_dims = trim_trailing_singular_dims(dy_dims_untrimed);
axis = (dy_dims.size() == 0) ? dx_dims.size() : axis;
get_mid_dims(dx_dims, dy_dims, axis, &pre, &n, &post,
&is_common_broadcast);
}
int len = pre * n * post;
auto &dev_ctx =
ctx.template device_context<paddle::platform::XPUDeviceContext>();
if (post == 1) {
int r = xpu::matrix_vector_add_grad(
dev_ctx.x_context(), dout->data<T>(), dout->data<T>(),
dout->data<T>(), dout->data<T>(), dx_data, dy_data, pre, n);
if (r == xpu::Error_t::INVALID_PARAM) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::InvalidArgument(
"XPU kernel error of ElementWiseAddOp, error "
"message: INVALID_PARAM, "
"please check your input & output."));
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::Unavailable(
"XPU kernel error of ElementWiseAddOp, error "
"message: RUNTIME_ERROR, "
"please check whether Baidu Kunlun card is "
"properly installed."));
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted(
"XPU kernel error of ElementWiseAddOp, error message: "
"NO_ENOUGH_WORKSPACE, XPU has no enough memory."));
}
return;
}
if (dx == nullptr) {
PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void **>(&dx_data), len * sizeof(float)),
XPU_SUCCESS,
platform::errors::ResourceExhausted("XPU has no enough memory"));
}
if (dy == nullptr) {
PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void **>(&dy_data), len * sizeof(float)),
XPU_SUCCESS,
platform::errors::ResourceExhausted("XPU has no enough memory"));
} else {
if (len != n) {
PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast<void **>(&dy_data),
len * sizeof(float)),
XPU_SUCCESS, platform::errors::ResourceExhausted(
"XPU has no enough memory"));
}
}
int r = xpu::elementwise_add_grad(
dev_ctx.x_context(), dout->data<T>() /*x*/, dout->data<T>() /*y*/,
dout->data<T>() /*out*/, dout->data<T>(), dx_data, dy_data, len);
if (r == xpu::Error_t::INVALID_PARAM) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::InvalidArgument(
"XPU kernel error of ElementWiseAddOp, error "
"message: INVALID_PARAM, "
"please check your input & output."));
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::Unavailable(
"XPU kernel error of ElementWiseAddOp, error message: "
"RUNTIME_ERROR, "
"please check whether Baidu Kunlun card is properly installed."));
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted(
"XPU kernel error of ElementWiseAddOp, error message: "
"NO_ENOUGH_WORKSPACE, XPU has no enough memory."));
}
if ((dy != nullptr) && (len != n)) {
r = xpu::reduce_ew(dev_ctx.x_context(), dy_data, dy->data<T>(), pre, n,
post, xpu::ElementwiseOp::ASSIGN);
if (r == xpu::Error_t::INVALID_PARAM) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::InvalidArgument(
"XPU kernel error of ElementWiseAddOp, error "
"message: INVALID_PARAM, "
"please check your input & output."));
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::Unavailable(
"XPU kernel error of ElementWiseAddOp, error "
"message: RUNTIME_ERROR, "
"please check whether Baidu Kunlun card is "
"properly installed."));
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted(
"XPU kernel error of ElementWiseAddOp, error message: "
"NO_ENOUGH_WORKSPACE, XPU has no enough memory."));
}
dev_ctx.Wait();
xpu_free(dy_data);
}
if ((dx == nullptr || dy == nullptr) && !(dy != nullptr && len != n)) {
dev_ctx.Wait();
}
if (dx == nullptr) {
xpu_free(dx_data);
}
if (dy == nullptr) {
xpu_free(dy_data);
}
} }
}; };
......
...@@ -19,18 +19,19 @@ limitations under the License. */ ...@@ -19,18 +19,19 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename DeviceContext, typename T>
struct XPUDivFunctor { class ElementwiseDivXPUKernel : public framework::OpKernel<T> {
int operator()(xpu::Context* ctx, const T* x, const T* y, T* z, int len) { public:
return xpu::elementwise_div(ctx, x, y, z, len); void Compute(const framework::ExecutionContext& ctx) const override {
XPUElementwise<T>(ctx, xpu::div<T>);
} }
}; };
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class ElementwiseDivXPUKernel : public framework::OpKernel<T> { class ElementwiseDivGradXPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
XPUElementwise<T, XPUDivFunctor<T>>(ctx); XPUElementwiseGrad<T>(ctx, xpu::div_grad<T>, true);
} }
}; };
...@@ -40,4 +41,7 @@ namespace ops = paddle::operators; ...@@ -40,4 +41,7 @@ namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL( REGISTER_OP_XPU_KERNEL(
elementwise_div, elementwise_div,
ops::ElementwiseDivXPUKernel<paddle::platform::XPUDeviceContext, float>); ops::ElementwiseDivXPUKernel<paddle::platform::XPUDeviceContext, float>);
REGISTER_OP_XPU_KERNEL(elementwise_div_grad,
ops::ElementwiseDivGradXPUKernel<
paddle::platform::XPUDeviceContext, float>);
#endif #endif
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/elementwise/elementwise_div_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_xpu.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class ElementwiseFloordivXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
XPUElementwise<T>(ctx, xpu::floordiv<T>);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL(elementwise_floordiv,
ops::ElementwiseFloordivXPUKernel<
paddle::platform::XPUDeviceContext, float>);
#endif
...@@ -20,18 +20,19 @@ limitations under the License. */ ...@@ -20,18 +20,19 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename DeviceContext, typename T>
struct XPUMaxFunctor { class ElementwiseMaxXPUKernel : public framework::OpKernel<T> {
int operator()(xpu::Context* ctx, const T* x, const T* y, T* z, int len) { public:
return xpu::elementwise_max(ctx, x, y, z, len); void Compute(const framework::ExecutionContext& ctx) const override {
XPUElementwise<T>(ctx, xpu::max<T>);
} }
}; };
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class ElementwiseMaxXPUKernel : public framework::OpKernel<T> { class ElementwiseMaxGradXPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
XPUElementwise<T, XPUMaxFunctor<T>>(ctx); XPUElementwiseGrad<T>(ctx, xpu::max_grad<T>, true);
} }
}; };
...@@ -42,4 +43,7 @@ namespace ops = paddle::operators; ...@@ -42,4 +43,7 @@ namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL( REGISTER_OP_XPU_KERNEL(
elementwise_max, elementwise_max,
ops::ElementwiseMaxXPUKernel<paddle::platform::XPUDeviceContext, float>); ops::ElementwiseMaxXPUKernel<paddle::platform::XPUDeviceContext, float>);
REGISTER_OP_XPU_KERNEL(elementwise_max_grad,
ops::ElementwiseMaxGradXPUKernel<
paddle::platform::XPUDeviceContext, float>);
#endif #endif
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/elementwise/elementwise_max_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_xpu.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class ElementwiseMinXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
XPUElementwise<T>(ctx, xpu::min<T>);
}
};
template <typename DeviceContext, typename T>
class ElementwiseMinGradXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
XPUElementwiseGrad<T>(ctx, xpu::min_grad<T>, true);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL(
elementwise_min,
ops::ElementwiseMinXPUKernel<paddle::platform::XPUDeviceContext, float>);
REGISTER_OP_XPU_KERNEL(elementwise_min_grad,
ops::ElementwiseMinGradXPUKernel<
paddle::platform::XPUDeviceContext, float>);
#endif
...@@ -22,10 +22,18 @@ template <typename DeviceContext, typename T> ...@@ -22,10 +22,18 @@ template <typename DeviceContext, typename T>
class ElementwiseMulXPUKernel : public framework::OpKernel<T> { class ElementwiseMulXPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
XPUElementwise<T, XPUMulFunctor<T>>(ctx); XPUElementwise<T>(ctx, xpu::mul<T>);
} }
}; };
DEFINE_XPU_GRAD_KERNEL(Mul, mul, true); // DEFINE_XPU_GRAD_KERNEL(Mul, mul, true);
template <typename DeviceContext, typename T>
class ElementwiseMulGradXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
XPUElementwiseGrad<T>(ctx, xpu::mul_grad<T>, true);
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_sub_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_xpu.h"
#include "xpu/refactor/math.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class ElementwisePowXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
XPUElementwise<T>(ctx, xpu::pow<float>);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL(
elementwise_pow,
ops::ElementwisePowXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif
...@@ -16,25 +16,28 @@ limitations under the License. */ ...@@ -16,25 +16,28 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_sub_op.h" #include "paddle/fluid/operators/elementwise/elementwise_sub_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_xpu.h" #include "paddle/fluid/operators/elementwise/elementwise_xpu.h"
#include "xpu/refactor/math.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename DeviceContext, typename T>
struct XPUSubFunctor { class ElementwiseSubXPUKernel : public framework::OpKernel<T> {
int operator()(xpu::Context* ctx, const T* x, const T* y, T* z, int len) { public:
return xpu::elementwise_sub(ctx, x, y, z, len); void Compute(const framework::ExecutionContext& ctx) const override {
XPUElementwise<T>(ctx, xpu::sub<float>);
} }
}; };
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class ElementwiseSubXPUKernel : public framework::OpKernel<T> { class ElementwiseSubGradXPUKernel : public ElemwiseGradKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
XPUElementwise<T, XPUSubFunctor<T>>(ctx); ElemwiseGradKernel<T>::Compute(ctx);
XPUElementwiseGrad<T>(ctx, xpu::sub_grad<float>, false);
} }
}; };
DEFINE_XPU_GRAD_KERNEL(Sub, sub, false);
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
......
...@@ -13,175 +13,76 @@ See the License for the specific language governing permissions and ...@@ -13,175 +13,76 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include <algorithm>
#include <string> #include <string>
#include <unordered_map> #include <tuple>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "xpu/refactor/math.h"
inline std::string get_xpu_error_message(int error_type) {
static std::unordered_map<int, std::string> xpu_error_map = {
{baidu::xpu::api::INVALID_PARAM, "Parameter is invalid."},
{baidu::xpu::api::RUNTIME_ERROR,
"Please check whether Baidu Kunlun Card "
"is properly installed."},
{baidu::xpu::api::NO_ENOUGH_WORKSPACE,
"There is not enough memory in Baidu"
" Kunlun Card."}};
if (xpu_error_map.find(error_type) == xpu_error_map.end()) {
return "Unknown error type!";
}
return xpu_error_map[error_type];
}
#define XPU_MALLOC(addr, num_bytes) \
PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast<void**>(addr), num_bytes), \
XPU_SUCCESS, \
platform::errors::ResourceExhausted( \
"\n\nOut of memory error on XPU, Cannot" \
"allocate %s memory on XPU. \n\nPlease " \
"check whether there is any other process " \
"using XPU.\n", \
string::HumanReadableSize(num_bytes)))
#define DEFINE_XPU_GRAD_KERNEL(kernel_type, kernel_name, use_x_y_data) \
template <typename DeviceContext, typename T> \
class Elementwise##kernel_type##GradXPUKernel \
: public ElemwiseGradKernel<T> { \
public: \
void Compute(const framework::ExecutionContext& ctx) const override { \
ElemwiseGradKernel<T>::Compute(ctx); \
using Tensor = framework::Tensor; \
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out")); \
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X")); \
auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y")); \
auto dx_dims = dout->dims(); \
auto dy_dims_untrimed = dout->dims(); \
T* dx_data = NULL; \
T* dy_data = NULL; \
const T* y_data = nullptr; \
const T* x_data = nullptr; \
T* y_broadcast = nullptr; \
if (use_x_y_data) { \
auto* x = ctx.Input<Tensor>("X"); \
auto* y = ctx.Input<Tensor>("Y"); \
y_data = y->data<T>(); \
x_data = x->data<T>(); \
} else { \
x_data = dout->data<T>(); \
y_data = dout->data<T>(); \
} \
int axis = ctx.Attr<int>("axis"); \
PADDLE_ENFORCE_GE( \
dx_dims.size(), dy_dims_untrimed.size(), \
platform::errors::InvalidArgument( \
"Rank of first input must >= rank of second input.")); \
if (dx != nullptr) { \
dx->mutable_data<T>(ctx.GetPlace()); \
dx_dims = dx->dims(); \
dx_data = dx->data<T>(); \
} \
if (dy != nullptr) { \
dy->mutable_data<T>(ctx.GetPlace()); \
dy_dims_untrimed = dy->dims(); \
dy_data = dy->data<T>(); \
} \
int pre, n, post, is_run_common_broadcast; \
if (dx_dims == dy_dims_untrimed) { \
pre = post = 1; \
n = dout->numel(); \
} else { \
axis = (axis == -1 ? dx_dims.size() - dy_dims_untrimed.size() : axis); \
PADDLE_ENFORCE_EQ(axis >= 0 && axis < dx_dims.size(), true, \
platform::errors::InvalidArgument( \
"Axis should be in range [0, dx_dims)")); \
auto dy_dims = trim_trailing_singular_dims(dy_dims_untrimed); \
axis = (dy_dims.size() == 0) ? dx_dims.size() : axis; \
get_mid_dims(dx_dims, dy_dims, axis, &pre, &n, &post, \
&is_run_common_broadcast); \
} \
int len = pre * n * post; \
auto& dev_ctx = \
ctx.template device_context<paddle::platform::XPUDeviceContext>(); \
if (dx == nullptr) { \
XPU_MALLOC(&dx_data, len * sizeof(float)); \
} \
if (dy == nullptr) { \
XPU_MALLOC(&dy_data, len * sizeof(float)); \
} else { \
if (len != n) { \
XPU_MALLOC(&dy_data, len * sizeof(float)); \
} \
} \
if (use_x_y_data) { \
if (len != n) { \
XPU_MALLOC(&y_broadcast, len * sizeof(float)); \
int res = \
xpu::broadcast_ew(dev_ctx.x_context(), y_data, y_broadcast, pre, \
n, post, xpu::ElementwiseOp::ASSIGN); \
PADDLE_ENFORCE_EQ( \
res, xpu::Error_t::SUCCESS, \
platform::errors::External("XPU kernel error occur! %s", \
get_xpu_error_message(res))); \
y_data = y_broadcast; \
} \
} \
int res = xpu::elementwise_##kernel_name##_grad( \
dev_ctx.x_context(), x_data, y_data, dout->data<T>() /*out*/, \
dout->data<T>(), dx_data, dy_data, len); \
PADDLE_ENFORCE_EQ( \
res, xpu::Error_t::SUCCESS, \
platform::errors::External("XPU kernel error occur! %s", \
get_xpu_error_message(res))); \
if ((dy != nullptr) && (len != n)) { \
int res = xpu::reduce_ew(dev_ctx.x_context(), dy_data, dy->data<T>(), \
pre, n, post, xpu::ElementwiseOp::ASSIGN); \
PADDLE_ENFORCE_EQ( \
res, xpu::Error_t::SUCCESS, \
platform::errors::External("XPU kernel error occur! %s", \
get_xpu_error_message(res))); \
dev_ctx.Wait(); \
xpu_free(dy_data); \
} \
if ((len != n || dx == nullptr || dy == nullptr) && \
!(dy != nullptr && len != n)) { \
dev_ctx.Wait(); \
} \
if (dx == nullptr) { \
xpu_free(dx_data); \
} \
if (dy == nullptr) { \
xpu_free(dy_data); \
} \
if (use_x_y_data) { \
if (len != n) { \
xpu_free(y_broadcast); \
} \
} \
} \
}
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> static std::pair<std::vector<int>, std::vector<int>> XPUDimsToBroadcastVector(
struct XPUAddFunctor { const framework::DDim& x, const framework::DDim& y) {
int operator()(xpu::Context* ctx, const T* x, const T* y, T* z, int len) { std::vector<int> x_v;
return xpu::elementwise_add(ctx, x, y, z, len); std::vector<int> y_v;
int y_size = y.size();
for (int i = 0; i < y_size; ++i) {
if (x[i] == y[i]) {
x_v.push_back(y[i]);
y_v.push_back(y[i]);
continue;
}
x_v.push_back(1);
x_v.push_back(x[i]);
y_v.push_back(y[i] / x[i]);
y_v.push_back(x[i]);
} }
}; return std::make_pair(x_v, y_v);
}
template <typename T> static std::pair<std::vector<int>, std::vector<int>> XPUReducesAxisVector(
struct XPUMulFunctor { const framework::DDim& x, const framework::DDim& y) {
int operator()(xpu::Context* ctx, const T* x, const T* y, T* z, int len) { std::vector<int> x_vector;
return xpu::elementwise_mul(ctx, x, y, z, len); std::vector<int> axis_v;
PADDLE_ENFORCE_GT(
x.size(), 0, platform::errors::OutOfRange("x size is less 1, x shape is ",
x.to_str()));
PADDLE_ENFORCE_GT(
y.size(), 0, platform::errors::OutOfRange("y size is less 1, y shape is ",
y.to_str()));
int y_nums = framework::product(y);
x_vector = framework::vectorize<int>(x);
if (y_nums == 1) {
for (int i = 0; i < x.size(); ++i) {
axis_v.push_back(i);
}
return std::make_pair(x_vector, axis_v);
}
int yidx = 0;
for (size_t i = 0; i < x_vector.size(); ++i) {
if (y[yidx] == 1) {
axis_v.push_back(i);
yidx++;
continue;
}
if (x_vector[i] != y[yidx]) {
axis_v.push_back(i);
continue;
}
yidx++;
} }
}; return std::make_pair(x_vector, axis_v);
}
template <typename T, typename Functor> template <typename T>
void XPUElementwise(const framework::ExecutionContext& ctx) { void XPUElementwise(
PADDLE_ENFORCE_EQ(platform::is_xpu_place(ctx.GetPlace()), true, const framework::ExecutionContext& ctx,
platform::errors::PreconditionNotMet( std::function<int(xpu::Context*, const T*, const T*, T*, int)> func) {
"This kernel only runs on XPU device."));
auto x_var = ctx.InputVar("X"); auto x_var = ctx.InputVar("X");
PADDLE_ENFORCE_NE(x_var, nullptr, platform::errors::InvalidArgument( PADDLE_ENFORCE_NE(x_var, nullptr, platform::errors::InvalidArgument(
"Cannot get input Variable X")); "Cannot get input Variable X"));
...@@ -194,74 +95,226 @@ void XPUElementwise(const framework::ExecutionContext& ctx) { ...@@ -194,74 +95,226 @@ void XPUElementwise(const framework::ExecutionContext& ctx) {
auto* y = ctx.Input<framework::LoDTensor>("Y"); auto* y = ctx.Input<framework::LoDTensor>("Y");
auto* z = ctx.Output<framework::LoDTensor>("Out"); auto* z = ctx.Output<framework::LoDTensor>("Out");
z->mutable_data<T>(ctx.GetPlace()); z->mutable_data<T>(ctx.GetPlace());
int axis = ctx.Attr<int>("axis");
auto x_dims = x.dims(); auto x_dims = x.dims();
auto y_dims_untrimed = y->dims(); auto y_dims = y->dims();
PADDLE_ENFORCE_GE(x_dims.size(), y_dims_untrimed.size(), int max_dim = std::max(x_dims.size(), y_dims.size());
platform::errors::InvalidArgument( int axis = ctx.Attr<int>("axis");
"Rank of first input must >= rank of second input.")); axis = (axis == -1 ? std::abs(x_dims.size() - y_dims.size()) : axis);
axis = (axis == -1 ? x_dims.size() - y_dims_untrimed.size() : axis);
PADDLE_ENFORCE_EQ(
axis >= 0 && axis < x_dims.size(), true,
platform::errors::InvalidArgument("Axis should be in range [0, x_dims)"));
auto y_dims = trim_trailing_singular_dims(y_dims_untrimed);
axis = (y_dims.size() == 0) ? x_dims.size() : axis;
int pre, n, post, is_common_broadcast;
get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post, &is_common_broadcast);
PADDLE_ENFORCE_NE(is_common_broadcast, 1, PADDLE_ENFORCE_GE(
platform::errors::Unimplemented( axis, 0,
"X's shape should be equal to Y's shape.")); platform::errors::InvalidArgument(
"Axis should be great than or equal to 0, but received axis is %d.",
axis));
PADDLE_ENFORCE_LT(axis, max_dim,
platform::errors::InvalidArgument(
"Axis should be less than %d, but received axis is %d.",
max_dim, axis));
int len = pre * n * post; std::vector<int> x_dims_array(max_dim);
std::vector<int> y_dims_array(max_dim);
std::vector<int> out_dims_array(max_dim);
GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array.data(),
y_dims_array.data(), out_dims_array.data(), max_dim,
axis);
framework::DDim out_dim = framework::make_ddim(out_dims_array);
const T* x_data = x.data<T>(); const T* x_data = x.data<T>();
const T* y_data = y->data<T>(); const T* y_data = y->data<T>();
T* z_data = z->data<T>(); T* z_data = z->data<T>();
T* y_broadcast = nullptr; bool need_wait = false;
framework::Tensor x_broadcast_tensor;
framework::Tensor y_broadcast_tensor;
auto& dev_ctx =
ctx.template device_context<paddle::platform::XPUDeviceContext>();
int ret = xpu::SUCCESS;
// begin broadcast now
if (x.numel() != z->numel()) {
// broadcast x
std::pair<std::vector<int>, std::vector<int>> bcast_v =
XPUDimsToBroadcastVector(framework::make_ddim(x_dims_array), out_dim);
ret = xpu::broadcast<T>(
dev_ctx.x_context(), x_data,
x_broadcast_tensor.mutable_data<T>(ctx.GetPlace(), z->numel()),
bcast_v.first, bcast_v.second);
PADDLE_ENFORCE_EQ(
ret, xpu::SUCCESS,
platform::errors::External(
"XPU kernel broadcast occur error in XPUElementwise error code %d",
ret));
need_wait = true;
x_data = x_broadcast_tensor.data<T>();
}
if (y->numel() != z->numel()) {
// broadcast y
std::vector<int> bcast_x_v;
std::vector<int> bcast_y_v;
std::pair<std::vector<int>, std::vector<int>> bcast_v =
XPUDimsToBroadcastVector(framework::make_ddim(y_dims_array), out_dim);
ret = xpu::broadcast<T>(
dev_ctx.x_context(), y_data,
y_broadcast_tensor.mutable_data<T>(ctx.GetPlace(), z->numel()),
bcast_v.first, bcast_v.second);
PADDLE_ENFORCE_EQ(
ret, xpu::SUCCESS,
platform::errors::External(
"XPU kernel broadcast occur error in XPUElementwise error code %d",
ret));
need_wait = true;
y_data = y_broadcast_tensor.data<T>();
}
int len = z->numel();
ret = func(dev_ctx.x_context(), x_data, y_data, z_data, len);
PADDLE_ENFORCE_EQ(
ret, xpu::SUCCESS,
platform::errors::External(
"XPU kernel Elementwise occur error in XPUElementwise error code ",
ret));
if (need_wait && dev_ctx.x_context()->xpu_stream) {
dev_ctx.Wait();
}
}
template <typename T>
void XPUElementwiseGrad(const framework::ExecutionContext& ctx,
std::function<int(xpu::Context*, const T*, const T*,
const T*, const T*, T*, T*, int len)>
func,
bool use_x_y_data) {
auto* x = ctx.Input<framework::Tensor>("X");
auto* y = ctx.Input<framework::Tensor>("Y");
auto* dz = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto* z = dz;
auto* dx = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
auto* dy = ctx.Output<framework::Tensor>(framework::GradVarName("Y"));
int axis = ctx.Attr<int>("axis");
const framework::DDim& x_dims = x->dims();
const framework::DDim& y_dims = y->dims();
int max_dim = std::max(x_dims.size(), y_dims.size());
axis = (axis == -1 ? std::abs(x_dims.size() - y_dims.size()) : axis);
PADDLE_ENFORCE_GE(
axis, 0,
platform::errors::InvalidArgument(
"Axis should be great than or equal to 0, but received axis is %d.",
axis));
PADDLE_ENFORCE_LT(axis, max_dim,
platform::errors::InvalidArgument(
"Axis should be less than %d, but received axis is %d.",
max_dim, axis));
std::vector<int> x_dims_array(max_dim);
std::vector<int> y_dims_array(max_dim);
std::vector<int> out_dims_array(max_dim);
GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array.data(),
y_dims_array.data(), out_dims_array.data(), max_dim,
axis);
framework::DDim out_dim = framework::make_ddim(out_dims_array);
int len = framework::product(out_dim);
framework::Tensor x_broadcast_tensor;
framework::Tensor y_broadcast_tensor;
framework::Tensor dx_local_tensor;
framework::Tensor dy_local_tensor;
bool need_wait = false;
const T* x_data = use_x_y_data ? x->data<T>() : z->data<T>();
const T* y_data = use_x_y_data ? y->data<T>() : z->data<T>();
const T* z_data = z->data<T>();
const T* dz_data = (const T*)dz->data<T>();
bool dx_need_reduce = (dx != nullptr) && (dx->numel() != len);
bool dy_need_reduce = (dy != nullptr) && (dy->numel() != len);
T* dx_data = ((dx == nullptr) || dx_need_reduce)
? (dx_local_tensor.mutable_data<T>(ctx.GetPlace(), len))
: (dx->mutable_data<T>(ctx.GetPlace()));
T* dy_data = ((dy == nullptr) || dy_need_reduce)
? (dy_local_tensor.mutable_data<T>(ctx.GetPlace(), len))
: (dy->mutable_data<T>(ctx.GetPlace()));
int ret = xpu::SUCCESS;
auto& dev_ctx = auto& dev_ctx =
ctx.template device_context<paddle::platform::XPUDeviceContext>(); ctx.template device_context<paddle::platform::XPUDeviceContext>();
if (post == 1) { if (use_x_y_data && x->numel() != len) {
if (std::is_same<Functor, XPUAddFunctor<T>>::value) { std::vector<int> bcast_x_v;
int res = xpu::matrix_vector_add(dev_ctx.x_context(), x_data, y_data, std::vector<int> bcast_y_v;
z_data, pre, n); std::pair<std::vector<int>, std::vector<int>> bcast_v =
PADDLE_ENFORCE_EQ(res, xpu::Error_t::SUCCESS, XPUDimsToBroadcastVector(framework::make_ddim(x_dims_array), out_dim);
platform::errors::External("XPU kernel error occur! %s", ret = xpu::broadcast<T>(
get_xpu_error_message(res))); dev_ctx.x_context(), x_data,
return; x_broadcast_tensor.mutable_data<T>(ctx.GetPlace(), len), bcast_v.first,
} bcast_v.second);
if (std::is_same<Functor, XPUMulFunctor<T>>::value) { PADDLE_ENFORCE_EQ(ret, xpu::SUCCESS,
int res = xpu::matrix_vector_mul(dev_ctx.x_context(), x_data, y_data, platform::errors::External(
z_data, pre, n); "XPU kernel broadcast error occur! %d", ret));
PADDLE_ENFORCE_EQ(res, xpu::Error_t::SUCCESS, need_wait = true;
platform::errors::External("XPU kernel error occur! %s", x_data = x_broadcast_tensor.data<T>();
get_xpu_error_message(res))); }
return;
} if (use_x_y_data && y->numel() != len) {
// broadcast y
std::vector<int> bcast_x_v;
std::vector<int> bcast_y_v;
std::pair<std::vector<int>, std::vector<int>> bcast_v =
XPUDimsToBroadcastVector(framework::make_ddim(y_dims_array), out_dim);
ret = xpu::broadcast<T>(
dev_ctx.x_context(), y_data,
y_broadcast_tensor.mutable_data<T>(ctx.GetPlace(), len), bcast_v.first,
bcast_v.second);
PADDLE_ENFORCE_EQ(ret, xpu::SUCCESS,
platform::errors::External(
"XPU kernel broadcast error occur! %d", ret));
need_wait = true;
y_data = y_broadcast_tensor.data<T>();
} }
if (pre != 1 || post != 1) { ret = func(dev_ctx.x_context(), x_data, y_data, z_data, dz_data, dx_data,
XPU_MALLOC(&y_broadcast, len * sizeof(T)); dy_data, len);
int res = xpu::broadcast_ew(dev_ctx.x_context(), y_data, y_broadcast, pre, PADDLE_ENFORCE_EQ(ret, xpu::SUCCESS, platform::errors::External(
n, post, xpu::ElementwiseOp::ASSIGN); "XPU kernel binary occur error in "
PADDLE_ENFORCE_EQ(res, xpu::Error_t::SUCCESS, "XPUElementwiseGrad, error code %d",
platform::errors::External("XPU kernel error occur! %s", ret));
get_xpu_error_message(res)));
y_data = y_broadcast; if (dx_need_reduce) {
const framework::DDim& dx_dims = dx->dims();
std::pair<std::vector<int>, std::vector<int>> reduce_v =
XPUReducesAxisVector(out_dim, dx_dims);
ret = xpu::reduce_sum(dev_ctx.x_context(), dx_data,
dx->mutable_data<T>(ctx.GetPlace()), reduce_v.first,
reduce_v.second);
PADDLE_ENFORCE_EQ(
ret, xpu::SUCCESS,
platform::errors::External("XPU kernel reduce_sum occur error in "
"XPUElementwiseGrad, error code %d",
ret));
need_wait = true;
} }
Functor functor; if (dy_need_reduce) {
int res = functor(dev_ctx.x_context(), x_data, y_data, z_data, len); const framework::DDim& dy_dims = dy->dims();
PADDLE_ENFORCE_EQ(res, xpu::Error_t::SUCCESS, std::pair<std::vector<int>, std::vector<int>> reduce_v =
platform::errors::External("XPU kernel error occur! %s", XPUReducesAxisVector(out_dim, dy_dims);
get_xpu_error_message(res))); ret = xpu::reduce_sum(dev_ctx.x_context(), dy_data,
dy->mutable_data<T>(ctx.GetPlace()), reduce_v.first,
reduce_v.second);
PADDLE_ENFORCE_EQ(
ret, xpu::SUCCESS,
platform::errors::External("XPU kernel reduce_sum occur error in "
"XPUElementwiseGrad, error code %d",
ret));
need_wait = true;
}
if (pre != 1 || post != 1) { if (need_wait && dev_ctx.x_context()->xpu_stream) {
dev_ctx.Wait(); dev_ctx.Wait();
xpu_free(y_broadcast);
} }
} }
......
...@@ -19,6 +19,9 @@ limitations under the License. */ ...@@ -19,6 +19,9 @@ limitations under the License. */
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include "xpu/refactor/math.h"
#include "xpu/refactor/nn.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -41,11 +44,13 @@ class SoftmaxWithCrossEntropyXPUKernel : public framework::OpKernel<T> { ...@@ -41,11 +44,13 @@ class SoftmaxWithCrossEntropyXPUKernel : public framework::OpKernel<T> {
loss->mutable_data<T>(context.GetPlace()); loss->mutable_data<T>(context.GetPlace());
const int n = SizeToAxis(axis, logits->dims()); const int n = SizeToAxis(axis, logits->dims());
const int d = SizeFromAxis(axis, logits->dims()); const int d = SizeFromAxis(axis, logits->dims());
std::vector<int> logits_dims = framework::vectorize<int>(logits->dims());
// softmax // softmax
auto& dev_ctx = auto& dev_ctx =
context.template device_context<platform::XPUDeviceContext>(); context.template device_context<platform::XPUDeviceContext>();
int r = xpu::softmax2d_forward(dev_ctx.x_context(), logits->data<float>(), int r = xpu::softmax(dev_ctx.x_context(), logits->data<float>(),
softmax->data<float>(), n, d); softmax->data<float>(), logits_dims, axis);
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS, r, xpu::Error_t::SUCCESS,
platform::errors::External("XPU kernel error. Softmax2d_forward " platform::errors::External("XPU kernel error. Softmax2d_forward "
...@@ -55,44 +60,35 @@ class SoftmaxWithCrossEntropyXPUKernel : public framework::OpKernel<T> { ...@@ -55,44 +60,35 @@ class SoftmaxWithCrossEntropyXPUKernel : public framework::OpKernel<T> {
auto ignore_index = context.Attr<int>("ignore_index"); auto ignore_index = context.Attr<int>("ignore_index");
const bool soft_label = context.Attr<bool>("soft_label"); const bool soft_label = context.Attr<bool>("soft_label");
if (soft_label) { if (soft_label) {
PADDLE_THROW(platform::errors::InvalidArgument( r = xpu::soft_cross_entropy<float>(
"XPU only support soft_label == false for now!")); dev_ctx.x_context(), softmax->data<float>(), labels->data<float>(),
loss->data<float>(), n, d);
PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS,
platform::errors::External("XPU kernel error. soft_cross_entropy "
"execution not succeed, error code=%d",
r));
} else { } else {
auto* p_labels = labels->data<int64_t>(); Tensor labels_int32;
int64_t* labels_int64_host = labels_int32.mutable_data<int32_t>(context.GetPlace(), labels->numel());
reinterpret_cast<int64_t*>(std::malloc(n * sizeof(int64_t))); r = xpu::cast_v2<int64_t, int32_t>(
int* labels_int32_host = dev_ctx.x_context(), labels->data<int64_t>(),
reinterpret_cast<int*>(std::malloc(n * sizeof(int))); labels_int32.data<int32_t>(), labels->numel());
int* labels_int32_device = NULL; PADDLE_ENFORCE_EQ(
int ret = xpu_malloc(reinterpret_cast<void**>(&labels_int32_device), r, xpu::Error_t::SUCCESS,
n * sizeof(int)); platform::errors::External("XPU kernel error. cast_v2 "
PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, "execution not succeed, error code=%d",
platform::errors::External( r));
"XPU API return wrong value[%d], please check "
"where Baidu Kunlun Card is properly installed.", r = xpu::hard_cross_entropy<float, int32_t>(
ret)); dev_ctx.x_context(), softmax->data<float>(),
dev_ctx.Wait(); labels_int32.data<int32_t>(), loss->data<float>(), nullptr, n, d,
memory::Copy(platform::CPUPlace(), labels_int64_host, ignore_index);
BOOST_GET_CONST(platform::XPUPlace, context.GetPlace()),
p_labels, n * sizeof(int64_t));
for (int i = 0; i < n; ++i) {
labels_int32_host[i] = labels_int64_host[i];
}
memory::Copy(BOOST_GET_CONST(platform::XPUPlace, context.GetPlace()),
labels_int32_device, platform::CPUPlace(), labels_int32_host,
n * sizeof(int));
int r = xpu::cross_entropy_forward(
dev_ctx.x_context(), n, d, softmax->data<float>(),
labels_int32_device, loss->data<float>(), nullptr, ignore_index);
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS, r, xpu::Error_t::SUCCESS,
platform::errors::External("XPU kernel error. Cross_entropy_forward " platform::errors::External("XPU kernel error. hard_cross_entropy "
"execution not succeed, error code=%d", "execution not succeed, error code=%d",
r)); r));
dev_ctx.Wait();
std::free(labels_int32_host);
std::free(labels_int64_host);
xpu_free(labels_int32_device);
} }
} }
}; };
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle
import paddle.fluid as fluid
paddle.enable_static()
class TestXPUElementwiseOpBase(object):
def setUp(self, op_type):
self.op_type = op_type
self.attrs = {'use_xpu': True}
self.is_common_broadcast = False
self.is_x_size_less_than_y = False
self.grad_implemented = False
self.y_grad_implemented = True
self.dtype = np.float32
self.__class__.op_type = self.op_type
self.__class__.use_xpu = True
self.__class__.dtype = self.dtype
def net(self, place):
with fluid.program_guard(fluid.Program(), fluid.Program()):
x = fluid.layers.data(
name='X', shape=self.inputs['X'].shape, dtype=self.dtype)
y = fluid.layers.data(
name='Y', shape=self.inputs['Y'].shape, dtype=self.dtype)
op = getattr(fluid.layers, self.op_type)
z = op(x, y)
exe = fluid.Executor(place)
z_value = exe.run(feed=self.inputs, fetch_list=[z.name])
def test_check_output(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
if not self.is_common_broadcast and not self.is_x_size_less_than_y:
self.check_output_with_place(place, atol=1e-3)
else:
with self.assertRaises(BaseException):
self.net(place)
def _check_grad_xpu_helper(self,
inputs_to_check,
output_names,
no_grad_set=None,
max_relative_error=0.01):
if self.grad_implemented and not self.is_common_broadcast \
and not self.is_x_size_less_than_y:
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place,
inputs_to_check,
output_names,
no_grad_set=no_grad_set,
max_relative_error=max_relative_error)
def test_check_grad_normal(self):
self._check_grad_xpu_helper(['X', 'Y'], 'Out')
def test_check_grad_ingore_x(self):
self._check_grad_xpu_helper(['Y'], 'Out', set("X"))
def test_check_grad_ingore_y(self):
if self.y_grad_implemented:
self._check_grad_xpu_helper(['X'], 'Out', set("Y"))
def init_axis(self):
self.axis = -1
def make_input(self, x_shape=[13, 17], y_shape=[13, 17]):
self.inputs = {
'X': np.random.uniform(0.1, 1, x_shape).astype(self.dtype),
'Y': np.random.uniform(0.1, 1, y_shape).astype(self.dtype)
}
def reshape_input(self, x_shape=None, y_shape=None):
if x_shape is None:
x = self.inputs['X']
else:
x = self.inputs['X'].reshape(x_shape)
if y_shape is None:
y = self.inputs['Y']
else:
y = self.inputs['Y'].reshape(y_shape)
return x, y
def make_output(self, x_shape=None, y_shape=None):
pass
...@@ -13,18 +13,21 @@ ...@@ -13,18 +13,21 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import numpy as np
import sys import sys
sys.path.append("..") sys.path.append("..")
import unittest
import numpy as np
import paddle import paddle
import paddle.fluid.core as core
from op_test import OpTest, skip_check_grad_ci from op_test import OpTest, skip_check_grad_ci
from op_test_xpu import XPUOpTest
import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import compiler, Program, program_guard from paddle.fluid import compiler, Program, program_guard
paddle.enable_static()
class TestElementwiseAddOp(OpTest): @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseAddOp(XPUOpTest):
def init_kernel_type(self): def init_kernel_type(self):
self.use_mkldnn = False self.use_mkldnn = False
...@@ -34,6 +37,7 @@ class TestElementwiseAddOp(OpTest): ...@@ -34,6 +37,7 @@ class TestElementwiseAddOp(OpTest):
self.init_input_output() self.init_input_output()
self.init_kernel_type() self.init_kernel_type()
self.init_axis() self.init_axis()
self.use_xpu = True
self.inputs = { self.inputs = {
'X': OpTest.np_dtype_to_fluid_dtype(self.x), 'X': OpTest.np_dtype_to_fluid_dtype(self.x),
...@@ -43,80 +47,33 @@ class TestElementwiseAddOp(OpTest): ...@@ -43,80 +47,33 @@ class TestElementwiseAddOp(OpTest):
self.outputs = {'Out': self.out} self.outputs = {'Out': self.out}
def test_check_output(self): def test_check_output(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode if paddle.is_compiled_with_xpu():
self.check_output(check_dygraph=(self.use_mkldnn == False))
def test_check_grad_normal(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode
if self.dtype == np.float16:
return
self.check_grad(
['X', 'Y'], 'Out', check_dygraph=(self.use_mkldnn == False))
def test_check_grad_ingore_x(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode
if self.dtype == np.float16:
return
self.check_grad(
['Y'],
'Out',
no_grad_set=set("X"),
check_dygraph=(self.use_mkldnn == False))
def test_check_grad_ingore_y(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode
if self.dtype == np.float16:
return
self.check_grad(
['X'],
'Out',
no_grad_set=set('Y'),
check_dygraph=(self.use_mkldnn == False))
def init_input_output(self):
self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
self.y = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
self.out = np.add(self.x, self.y)
def init_dtype(self):
self.dtype = np.float64
def init_axis(self):
self.axis = -1
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUElementwiseAddOp(OpTest):
def setUp(self):
self.op_type = "elementwise_add"
self.init_dtype()
self.init_input_output()
self.init_axis()
self.inputs = {'X': self.x, 'Y': self.y}
self.attrs = {'axis': self.axis, 'use_mkldnn': False, 'use_xpu': True}
self.outputs = {'Out': self.out}
def test_check_output(self):
if self.dtype == np.float32 and paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0) place = paddle.XPUPlace(0)
self.check_output_with_place(place) self.check_output_with_place(place)
def test_check_grad_normal(self): def test_check_grad_normal(self):
if self.dtype == np.float32 and paddle.is_compiled_with_xpu(): if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0) place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X', 'Y'], 'Out') self.check_grad_with_place(
place, ['X', 'Y'], 'Out', max_relative_error=0.006)
def test_check_grad_ingore_x(self): def test_check_grad_ingore_x(self):
if self.dtype == np.float32 and paddle.is_compiled_with_xpu(): if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0) place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['Y'], 'Out') self.check_grad_with_place(
place, ['Y'],
'Out',
no_grad_set=set("X"),
max_relative_error=0.006)
def test_check_grad_ingore_y(self): def test_check_grad_ingore_y(self):
if self.dtype == np.float32 and paddle.is_compiled_with_xpu(): if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0) place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X'], 'Out') self.check_grad_with_place(
place, ['X'],
'Out',
no_grad_set=set("Y"),
max_relative_error=0.006)
def init_input_output(self): def init_input_output(self):
self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
...@@ -130,6 +87,8 @@ class TestXPUElementwiseAddOp(OpTest): ...@@ -130,6 +87,8 @@ class TestXPUElementwiseAddOp(OpTest):
self.axis = -1 self.axis = -1
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
@skip_check_grad_ci( @skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1) to test broadcast.") reason="[skip shape check] Use y_shape(1) to test broadcast.")
class TestElementwiseAddOp_scalar(TestElementwiseAddOp): class TestElementwiseAddOp_scalar(TestElementwiseAddOp):
...@@ -139,6 +98,8 @@ class TestElementwiseAddOp_scalar(TestElementwiseAddOp): ...@@ -139,6 +98,8 @@ class TestElementwiseAddOp_scalar(TestElementwiseAddOp):
self.out = self.x + self.y self.out = self.x + self.y
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
@skip_check_grad_ci( @skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1,1) to test broadcast.") reason="[skip shape check] Use y_shape(1,1) to test broadcast.")
class TestElementwiseAddOp_scalar2(TestElementwiseAddOp): class TestElementwiseAddOp_scalar2(TestElementwiseAddOp):
...@@ -148,6 +109,8 @@ class TestElementwiseAddOp_scalar2(TestElementwiseAddOp): ...@@ -148,6 +109,8 @@ class TestElementwiseAddOp_scalar2(TestElementwiseAddOp):
self.out = self.x + self.y self.out = self.x + self.y
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseAddOp_Vector(TestElementwiseAddOp): class TestElementwiseAddOp_Vector(TestElementwiseAddOp):
def init_input_output(self): def init_input_output(self):
self.x = np.random.random((100, )).astype(self.dtype) self.x = np.random.random((100, )).astype(self.dtype)
...@@ -155,6 +118,8 @@ class TestElementwiseAddOp_Vector(TestElementwiseAddOp): ...@@ -155,6 +118,8 @@ class TestElementwiseAddOp_Vector(TestElementwiseAddOp):
self.out = np.add(self.x, self.y) self.out = np.add(self.x, self.y)
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp): class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp):
def init_input_output(self): def init_input_output(self):
self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.x = np.random.rand(100, 2, 3).astype(self.dtype)
...@@ -165,6 +130,8 @@ class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp): ...@@ -165,6 +130,8 @@ class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp):
self.axis = 0 self.axis = 0
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp): class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp):
def init_input_output(self): def init_input_output(self):
self.x = np.random.rand(2, 100, 3).astype(self.dtype) self.x = np.random.rand(2, 100, 3).astype(self.dtype)
...@@ -175,6 +142,8 @@ class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp): ...@@ -175,6 +142,8 @@ class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp):
self.axis = 1 self.axis = 1
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp): class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp):
def init_input_output(self): def init_input_output(self):
self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.x = np.random.rand(2, 3, 100).astype(self.dtype)
...@@ -182,6 +151,8 @@ class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp): ...@@ -182,6 +151,8 @@ class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp):
self.out = self.x + self.y.reshape(1, 1, 100) self.out = self.x + self.y.reshape(1, 1, 100)
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp): class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp):
def init_input_output(self): def init_input_output(self):
self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype) self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype)
...@@ -192,6 +163,8 @@ class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp): ...@@ -192,6 +163,8 @@ class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp):
self.axis = 1 self.axis = 1
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp): class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp):
def init_input_output(self): def init_input_output(self):
self.x = np.random.rand(100, 2, 3, 4).astype(self.dtype) self.x = np.random.rand(100, 2, 3, 4).astype(self.dtype)
...@@ -202,6 +175,8 @@ class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp): ...@@ -202,6 +175,8 @@ class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp):
self.axis = 0 self.axis = 0
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseAddOp_broadcast_5(TestElementwiseAddOp): class TestElementwiseAddOp_broadcast_5(TestElementwiseAddOp):
def init_input_output(self): def init_input_output(self):
self.x = np.random.rand(10, 3, 12).astype(self.dtype) self.x = np.random.rand(10, 3, 12).astype(self.dtype)
...@@ -209,6 +184,8 @@ class TestElementwiseAddOp_broadcast_5(TestElementwiseAddOp): ...@@ -209,6 +184,8 @@ class TestElementwiseAddOp_broadcast_5(TestElementwiseAddOp):
self.out = self.x + self.y self.out = self.x + self.y
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseAddOp_broadcast_6(TestElementwiseAddOp): class TestElementwiseAddOp_broadcast_6(TestElementwiseAddOp):
def init_input_output(self): def init_input_output(self):
self.x = np.random.rand(2, 12, 3, 5).astype(self.dtype) self.x = np.random.rand(2, 12, 3, 5).astype(self.dtype)
...@@ -216,6 +193,8 @@ class TestElementwiseAddOp_broadcast_6(TestElementwiseAddOp): ...@@ -216,6 +193,8 @@ class TestElementwiseAddOp_broadcast_6(TestElementwiseAddOp):
self.out = self.x + self.y self.out = self.x + self.y
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseAddOp_broadcast_7(TestElementwiseAddOp): class TestElementwiseAddOp_broadcast_7(TestElementwiseAddOp):
def init_input_output(self): def init_input_output(self):
self.x = np.random.rand(1, 1, 20, 5).astype(self.dtype) self.x = np.random.rand(1, 1, 20, 5).astype(self.dtype)
...@@ -223,6 +202,8 @@ class TestElementwiseAddOp_broadcast_7(TestElementwiseAddOp): ...@@ -223,6 +202,8 @@ class TestElementwiseAddOp_broadcast_7(TestElementwiseAddOp):
self.out = self.x + self.y self.out = self.x + self.y
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp): class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp):
def init_input_output(self): def init_input_output(self):
self.x = np.random.rand(2, 10, 12).astype(self.dtype) self.x = np.random.rand(2, 10, 12).astype(self.dtype)
...@@ -233,6 +214,8 @@ class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp): ...@@ -233,6 +214,8 @@ class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp):
self.axis = 1 self.axis = 1
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
@skip_check_grad_ci( @skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1) to test broadcast.") reason="[skip shape check] Use y_shape(1) to test broadcast.")
class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp): class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp):
...@@ -245,6 +228,8 @@ class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp): ...@@ -245,6 +228,8 @@ class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp):
self.axis = 1 self.axis = 1
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseAddOp_channelwise_add(TestElementwiseAddOp): class TestElementwiseAddOp_channelwise_add(TestElementwiseAddOp):
def init_input_output(self): def init_input_output(self):
self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.x = np.random.rand(100, 2, 3).astype(self.dtype)
...@@ -255,6 +240,8 @@ class TestElementwiseAddOp_channelwise_add(TestElementwiseAddOp): ...@@ -255,6 +240,8 @@ class TestElementwiseAddOp_channelwise_add(TestElementwiseAddOp):
self.axis = -1 self.axis = -1
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseAddOp_commonuse_add1(TestElementwiseAddOp): class TestElementwiseAddOp_commonuse_add1(TestElementwiseAddOp):
def init_input_output(self): def init_input_output(self):
self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.x = np.random.rand(2, 3, 100).astype(self.dtype)
...@@ -265,6 +252,8 @@ class TestElementwiseAddOp_commonuse_add1(TestElementwiseAddOp): ...@@ -265,6 +252,8 @@ class TestElementwiseAddOp_commonuse_add1(TestElementwiseAddOp):
self.axis = -1 self.axis = -1
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseAddOp_commonuse_add2(TestElementwiseAddOp): class TestElementwiseAddOp_commonuse_add2(TestElementwiseAddOp):
def init_input_output(self): def init_input_output(self):
self.x = np.random.rand(10, 3, 1, 4).astype(self.dtype) self.x = np.random.rand(10, 3, 1, 4).astype(self.dtype)
...@@ -275,6 +264,8 @@ class TestElementwiseAddOp_commonuse_add2(TestElementwiseAddOp): ...@@ -275,6 +264,8 @@ class TestElementwiseAddOp_commonuse_add2(TestElementwiseAddOp):
self.axis = -1 self.axis = -1
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp): class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp):
def init_input_output(self): def init_input_output(self):
self.x = np.random.rand(10, 12).astype(self.dtype) self.x = np.random.rand(10, 12).astype(self.dtype)
...@@ -285,14 +276,16 @@ class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp): ...@@ -285,14 +276,16 @@ class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp):
self.axis = 2 self.axis = 2
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseAddOpError(unittest.TestCase): class TestElementwiseAddOpError(unittest.TestCase):
def test_errors(self): def test_errors(self):
with program_guard(Program(), Program()): with program_guard(Program(), Program()):
# the input of elementwise_add must be Variable. # the input of elementwise_add must be Variable.
x1 = fluid.create_lod_tensor( x1 = fluid.create_lod_tensor(
np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.XPUPlace(0))
y1 = fluid.create_lod_tensor( y1 = fluid.create_lod_tensor(
np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.XPUPlace(0))
self.assertRaises(TypeError, fluid.layers.elementwise_add, x1, y1) self.assertRaises(TypeError, fluid.layers.elementwise_add, x1, y1)
# the input dtype of elementwise_add must be float16 or float32 or float64 or int32 or int64 # the input dtype of elementwise_add must be float16 or float32 or float64 or int32 or int64
...@@ -302,6 +295,8 @@ class TestElementwiseAddOpError(unittest.TestCase): ...@@ -302,6 +295,8 @@ class TestElementwiseAddOpError(unittest.TestCase):
self.assertRaises(TypeError, fluid.layers.elementwise_add, x2, y2) self.assertRaises(TypeError, fluid.layers.elementwise_add, x2, y2)
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestAddOp(unittest.TestCase): class TestAddOp(unittest.TestCase):
def test_name(self): def test_name(self):
with fluid.program_guard(fluid.Program()): with fluid.program_guard(fluid.Program()):
...@@ -324,7 +319,7 @@ class TestAddOp(unittest.TestCase): ...@@ -324,7 +319,7 @@ class TestAddOp(unittest.TestCase):
y = fluid.data(name="y", shape=[3], dtype='float32') y = fluid.data(name="y", shape=[3], dtype='float32')
z = paddle.add(x, y) z = paddle.add(x, y)
place = fluid.CPUPlace() place = fluid.XPUPlace(0)
exe = fluid.Executor(place) exe = fluid.Executor(place)
z_value = exe.run(feed=gen_data(), fetch_list=[z.name]) z_value = exe.run(feed=gen_data(), fetch_list=[z.name])
z_expected = np.array([3., 8., 6.]) z_expected = np.array([3., 8., 6.])
...@@ -332,8 +327,8 @@ class TestAddOp(unittest.TestCase): ...@@ -332,8 +327,8 @@ class TestAddOp(unittest.TestCase):
def test_dygraph(self): def test_dygraph(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
np_x = np.array([2, 3, 4]).astype('float64') np_x = np.array([2, 3, 4]).astype('float32')
np_y = np.array([1, 5, 2]).astype('float64') np_y = np.array([1, 5, 2]).astype('float32')
x = fluid.dygraph.to_variable(np_x) x = fluid.dygraph.to_variable(np_x)
y = fluid.dygraph.to_variable(np_y) y = fluid.dygraph.to_variable(np_y)
z = paddle.add(x, y) z = paddle.add(x, y)
......
...@@ -17,121 +17,233 @@ import unittest ...@@ -17,121 +17,233 @@ import unittest
import numpy as np import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core
from op_test import OpTest, skip_check_grad_ci from op_test import OpTest, skip_check_grad_ci
from elementwise import TestXPUElementwiseOpBase from op_test_xpu import XPUOpTest
paddle.enable_static() paddle.enable_static()
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestXPUElementwiseDivOp(OpTest, TestXPUElementwiseOpBase): class ElementwiseDivOp(XPUOpTest):
def setUp(self): def setUp(self):
TestXPUElementwiseOpBase.setUp(self, "elementwise_div") self.op_type = "elementwise_div"
self.make_input() self.dtype = np.float32
self.make_output() self.init_dtype()
self.use_xpu = True
def make_output(self, x_shape=None, y_shape=None): """ Warning
x, y = self.reshape_input(x_shape, y_shape) CPU gradient check error!
self.outputs = {'Out': np.divide(x, y)} 'X': np.random.random((32,84)).astype("float32"),
'Y': np.random.random((32,84)).astype("float32")
"""
self.inputs = {
'X': np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype),
'Y': np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
}
self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
def test_check_output(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
def test_check_grad_normal(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X', 'Y'], 'Out', max_relative_error=0.05)
def test_check_grad_ingore_x(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['Y'],
'Out',
max_relative_error=0.05,
no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X'],
'Out',
max_relative_error=0.05,
no_grad_set=set('Y'))
def init_dtype(self):
pass
@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1) to test broadcast.")
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseDivOp_scalar(ElementwiseDivOp):
def setUp(self):
self.op_type = "elementwise_div"
self.inputs = {
'X': np.random.uniform(0.1, 1, [20, 3, 4]).astype(np.float32),
'Y': np.random.uniform(0.1, 1, [1]).astype(np.float32)
}
self.outputs = {'Out': self.inputs['X'] / self.inputs['Y']}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseDivOp_scalar(TestXPUElementwiseDivOp): class TestElementwiseDivOp_Vector(ElementwiseDivOp):
def setUp(self): def setUp(self):
super(TestElementwiseDivOp_scalar, self).setUp() self.op_type = "elementwise_div"
self.grad_implemented = False self.inputs = {
self.make_input([20, 3, 4], [1]) 'X': np.random.uniform(0.1, 1, [100]).astype("float32"),
self.make_output() 'Y': np.random.uniform(0.1, 1, [100]).astype("float32")
}
self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseDivOp_Vector(TestXPUElementwiseDivOp): class TestElementwiseDivOp_broadcast_0(ElementwiseDivOp):
def setUp(self): def setUp(self):
super(TestElementwiseDivOp_Vector, self).setUp() self.op_type = "elementwise_div"
self.make_input([100, ], [100, ]) self.inputs = {
self.make_output() 'X': np.random.uniform(0.1, 1, [100, 3, 4]).astype("float32"),
'Y': np.random.uniform(0.1, 1, [100]).astype("float32")
}
self.attrs = {'axis': 0}
self.outputs = {
'Out':
np.divide(self.inputs['X'], self.inputs['Y'].reshape(100, 1, 1))
}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseDivOp_broadcast_0(TestXPUElementwiseDivOp): class TestElementwiseDivOp_broadcast_1(ElementwiseDivOp):
def setUp(self): def setUp(self):
super(TestElementwiseDivOp_broadcast_0, self).setUp() self.op_type = "elementwise_div"
self.attrs['axis'] = 0 self.inputs = {
self.make_input([100, 3, 4], [100, ]) 'X': np.random.uniform(0.1, 1, [2, 100, 4]).astype("float32"),
self.make_output(y_shape=[100, 1, 1]) 'Y': np.random.uniform(0.1, 1, [100]).astype("float32")
}
self.attrs = {'axis': 1}
self.outputs = {
'Out':
np.divide(self.inputs['X'], self.inputs['Y'].reshape(1, 100, 1))
}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseDivOp_broadcast_1(TestXPUElementwiseDivOp): class TestElementwiseDivOp_broadcast_2(ElementwiseDivOp):
def setUp(self): def setUp(self):
super(TestElementwiseDivOp_broadcast_1, self).setUp() self.op_type = "elementwise_div"
self.attrs['axis'] = 1 self.inputs = {
self.make_input([2, 100, 4], [100, ]) 'X': np.random.uniform(0.1, 1, [2, 3, 100]).astype("float32"),
self.make_output(y_shape=[1, 100, 1]) 'Y': np.random.uniform(0.1, 1, [100]).astype("float32")
}
self.outputs = {
'Out':
np.divide(self.inputs['X'], self.inputs['Y'].reshape(1, 1, 100))
}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseDivOp_broadcast_2(TestXPUElementwiseDivOp): class TestElementwiseDivOp_broadcast_3(ElementwiseDivOp):
def setUp(self): def setUp(self):
super(TestElementwiseDivOp_broadcast_2, self).setUp() self.op_type = "elementwise_div"
self.make_input([2, 3, 100], [100, ]) self.inputs = {
self.make_output(y_shape=[1, 1, 100]) 'X': np.random.uniform(0.1, 1, [2, 10, 12, 5]).astype("float32"),
'Y': np.random.uniform(0.1, 1, [10, 12]).astype("float32")
}
self.attrs = {'axis': 1}
self.outputs = {
'Out':
np.divide(self.inputs['X'], self.inputs['Y'].reshape(1, 10, 12, 1))
}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseDivOp_broadcast_3(TestXPUElementwiseDivOp): class TestElementwiseDivOp_broadcast_4(ElementwiseDivOp):
def setUp(self): def setUp(self):
super(TestElementwiseDivOp_broadcast_3, self).setUp() self.op_type = "elementwise_div"
self.attrs['axis'] = 1 self.inputs = {
self.make_input([2, 10, 12, 5], [10, 12]) 'X': np.random.uniform(0.1, 1, [2, 3, 50]).astype("float32"),
self.make_output(y_shape=[1, 10, 12, 1]) 'Y': np.random.uniform(0.1, 1, [2, 1, 50]).astype("float32")
}
self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseDivOp_broadcast_4(TestXPUElementwiseDivOp): class TestElementwiseDivOp_broadcast_5(ElementwiseDivOp):
def setUp(self): def setUp(self):
super(TestElementwiseDivOp_broadcast_4, self).setUp() self.op_type = "elementwise_div"
self.is_common_broadcast = True self.inputs = {
self.make_input([2, 3, 50], [2, 1, 50]) 'X': np.random.uniform(0.1, 1, [2, 3, 4, 20]).astype("float32"),
self.make_output() 'Y': np.random.uniform(0.1, 1, [2, 3, 1, 20]).astype("float32")
}
self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseDivOp_broadcast_5(TestXPUElementwiseDivOp): class TestElementwiseDivOp_commonuse_1(ElementwiseDivOp):
def setUp(self): def setUp(self):
super(TestElementwiseDivOp_broadcast_5, self).setUp() self.op_type = "elementwise_div"
self.is_common_broadcast = True self.inputs = {
self.make_input([2, 3, 4, 20], [2, 3, 1, 20]) 'X': np.random.uniform(0.1, 1, [2, 3, 100]).astype("float32"),
self.make_output() 'Y': np.random.uniform(0.1, 1, [1, 1, 100]).astype("float32"),
}
self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseDivOp_commonuse_1(TestXPUElementwiseDivOp): class TestElementwiseDivOp_commonuse_2(ElementwiseDivOp):
def setUp(self): def setUp(self):
super(TestElementwiseDivOp_commonuse_1, self).setUp() self.op_type = "elementwise_div"
self.is_common_broadcast = True self.inputs = {
self.make_input([2, 3, 100], [1, 1, 100]) 'X': np.random.uniform(0.1, 1, [30, 3, 1, 5]).astype("float32"),
self.make_output() 'Y': np.random.uniform(0.1, 1, [30, 1, 4, 1]).astype("float32"),
}
self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseDivOp_xsize_lessthan_ysize(TestXPUElementwiseDivOp): class TestElementwiseDivOp_xsize_lessthan_ysize(ElementwiseDivOp):
def setUp(self): def setUp(self):
super(TestElementwiseDivOp_xsize_lessthan_ysize, self).setUp() self.op_type = "elementwise_div"
self.is_x_size_less_than_y = True self.inputs = {
self.attrs['axis'] = 2 'X': np.random.uniform(0.1, 1, [10, 12]).astype("float32"),
self.make_input([10, 12], [2, 3, 10, 12]) 'Y': np.random.uniform(0.1, 1, [2, 3, 10, 12]).astype("float32"),
self.make_output(x_shape=[1, 1, 10, 12]) }
self.attrs = {'axis': 2}
self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseDivBroadcast(unittest.TestCase):
def test_shape_with_batch_sizes(self):
with fluid.program_guard(fluid.Program()):
x_var = fluid.data(
name='x', dtype='float32', shape=[None, 3, None, None])
one = 2.
out = one / x_var
exe = fluid.Executor(fluid.XPUPlace(0))
x = np.random.uniform(0.1, 0.6, (1, 3, 32, 32)).astype("float32")
out_result, = exe.run(feed={'x': x}, fetch_list=[out])
self.assertEqual((out_result == (2 / x)).all(), True)
if __name__ == '__main__': if __name__ == '__main__':
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
sys.path.append("..")
import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from op_test import OpTest, skip_check_grad_ci
from op_test_xpu import XPUOpTest
paddle.enable_static()
import random
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseModOp(XPUOpTest):
def init_kernel_type(self):
self.use_mkldnn = False
def setUp(self):
self.op_type = "elementwise_floordiv"
self.dtype = np.float32
self.axis = -1
self.init_dtype()
self.init_input_output()
self.init_kernel_type()
self.init_axis()
self.inputs = {
'X': OpTest.np_dtype_to_fluid_dtype(self.x),
'Y': OpTest.np_dtype_to_fluid_dtype(self.y)
}
self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn}
self.outputs = {'Out': self.out}
def test_check_output(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
def init_input_output(self):
self.x = np.random.uniform(0, 10000, [10, 10]).astype(self.dtype)
self.y = np.random.uniform(0, 1000, [10, 10]).astype(self.dtype)
self.out = np.floor_divide(self.x, self.y)
def init_dtype(self):
pass
def init_axis(self):
pass
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseModOp_scalar(TestElementwiseModOp):
def init_input_output(self):
scale_x = random.randint(0, 100000000)
scale_y = random.randint(1, 100000000)
self.x = (np.random.rand(2, 3, 4) * scale_x).astype(self.dtype)
self.y = (np.random.rand(1) * scale_y + 1).astype(self.dtype)
self.out = np.floor_divide(self.x, self.y)
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseModOpInverse(TestElementwiseModOp):
def init_input_output(self):
self.x = np.random.uniform(0, 10000, [10]).astype(self.dtype)
self.y = np.random.uniform(0, 1000, [10, 10]).astype(self.dtype)
self.out = np.floor_divide(self.x, self.y)
if __name__ == '__main__':
unittest.main()
...@@ -16,113 +16,163 @@ sys.path.append("..") ...@@ -16,113 +16,163 @@ sys.path.append("..")
import unittest import unittest
import numpy as np import numpy as np
from op_test import OpTest, skip_check_grad_ci from op_test import OpTest, skip_check_grad_ci
from op_test_xpu import XPUOpTest
import paddle import paddle
from elementwise import TestXPUElementwiseOpBase
paddle.enable_static() paddle.enable_static()
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestXPUElementwiseOp(OpTest, TestXPUElementwiseOpBase): class TestElementwiseOp(XPUOpTest):
def setUp(self): def setUp(self):
TestXPUElementwiseOpBase.setUp(self, "elementwise_max") self.use_xpu = True
self.make_input() self.op_type = "elementwise_max"
self.make_output() # If x and y have the same value, the max() is not differentiable.
# So we generate test data by the following method
def make_input(self, x_shape=[13, 17], y_shape=[13, 17], idx_list=None): # to avoid them being too close to each other.
x = np.random.random(x_shape).astype(self.dtype) x = np.random.uniform(0.1, 1, [13, 17]).astype("float32")
sgn = np.random.choice([-1, 1], y_shape).astype(self.dtype) sgn = np.random.choice([-1, 1], [13, 17]).astype("float32")
if idx_list is None: y = x + sgn * np.random.uniform(0.1, 1, [13, 17]).astype("float32")
y = x + sgn * np.random.uniform(0.1, 1, y_shape).astype(self.dtype)
else:
x_temp = x
for idx in idx_list:
x_temp = np.take(x_temp, [0], axis=idx)
sgn = sgn.reshape(x_temp.shape)
y = x_temp + sgn * np.random.uniform(0.1, 1, x_temp.shape)
y = y.reshape(y_shape).astype(self.dtype)
self.inputs = {'X': x, 'Y': y} self.inputs = {'X': x, 'Y': y}
self.outputs = {'Out': np.maximum(self.inputs['X'], self.inputs['Y'])}
def make_output(self, x_shape=None, y_shape=None):
x, y = self.reshape_input(x_shape, y_shape) def test_check_output(self):
self.outputs = {'Out': np.maximum(x, y)} if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
def test_check_grad_normal(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X', 'Y'], 'Out')
def test_check_grad_ingore_x(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['Y'],
'Out',
max_relative_error=0.006,
no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X'],
'Out',
max_relative_error=0.006,
no_grad_set=set('Y'))
@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1) to test broadcast.")
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseMaxOp_scalar(TestXPUElementwiseOp): class TestElementwiseMaxOp_scalar(TestElementwiseOp):
def setUp(self): def setUp(self):
super(TestElementwiseMaxOp_scalar, self).setUp() self.op_type = "elementwise_max"
self.make_input([2, 3, 20], [1]) x = np.random.random_integers(-5, 5, [2, 3, 20]).astype("float32")
self.make_output() y = np.array([0.5]).astype("float32")
self.grad_implemented = False self.inputs = {'X': x, 'Y': y}
self.outputs = {'Out': np.maximum(self.inputs['X'], self.inputs['Y'])}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseMaxOp_Vector(TestXPUElementwiseOp): class TestElementwiseMaxOp_Vector(TestElementwiseOp):
def setUp(self): def setUp(self):
super(TestElementwiseMaxOp_Vector, self).setUp() self.op_type = "elementwise_max"
self.make_input([100, ], [100, ]) x = np.random.random((100, )).astype("float32")
self.make_output() sgn = np.random.choice([-1, 1], (100, )).astype("float32")
y = x + sgn * np.random.uniform(0.1, 1, (100, )).astype("float32")
self.inputs = {'X': x, 'Y': y}
self.outputs = {'Out': np.maximum(self.inputs['X'], self.inputs['Y'])}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseMaxOp_broadcast_0(TestXPUElementwiseOp): class TestElementwiseMaxOp_broadcast_0(TestElementwiseOp):
def setUp(self): def setUp(self):
super(TestElementwiseMaxOp_broadcast_0, self).setUp() self.op_type = "elementwise_max"
self.attrs['axis'] = 0 x = np.random.uniform(0.5, 1, (100, 5, 2)).astype(np.float32)
self.make_input([100, 5, 2], [100, ], [1, 2]) sgn = np.random.choice([-1, 1], (100, )).astype(np.float32)
self.make_output(y_shape=[100, 1, 1]) y = x[:, 0, 0] + sgn * \
np.random.uniform(1, 2, (100, )).astype(np.float32)
self.inputs = {'X': x, 'Y': y}
self.attrs = {'axis': 0}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), self.outputs = {
"core is not compiled with XPU") 'Out':
class TestElementwiseMaxOp_broadcast_1(TestXPUElementwiseOp): np.maximum(self.inputs['X'], self.inputs['Y'].reshape(100, 1, 1))
def setUp(self): }
super(TestElementwiseMaxOp_broadcast_1, self).setUp()
self.attrs['axis'] = 1
self.make_input([2, 100, 3], [100, ], [0, 2])
self.make_output(y_shape=[1, 100, 1])
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseMaxOp_broadcast_2(TestXPUElementwiseOp): class TestElementwiseMaxOp_broadcast_1(TestElementwiseOp):
def setUp(self): def setUp(self):
super(TestElementwiseMaxOp_broadcast_2, self).setUp() self.op_type = "elementwise_max"
self.make_input([1, 3, 100], [100, ], [0, 1]) x = np.random.uniform(0.5, 1, (2, 100, 3)).astype(np.float32)
self.make_output(y_shape=[1, 1, 100]) sgn = np.random.choice([-1, 1], (100, )).astype(np.float32)
y = x[0, :, 0] + sgn * \
np.random.uniform(1, 2, (100, )).astype(np.float32)
self.inputs = {'X': x, 'Y': y}
self.attrs = {'axis': 1}
self.outputs = {
'Out':
np.maximum(self.inputs['X'], self.inputs['Y'].reshape(1, 100, 1))
}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseMaxOp_broadcast_3(TestXPUElementwiseOp): class TestElementwiseMaxOp_broadcast_2(TestElementwiseOp):
def setUp(self): def setUp(self):
super(TestElementwiseMaxOp_broadcast_3, self).setUp() self.op_type = "elementwise_max"
self.attrs['axis'] = 1 x = np.random.uniform(0.5, 1, (1, 3, 100)).astype(np.float32)
self.make_input([2, 50, 2, 1], [50, 2], [0, 3]) sgn = np.random.choice([-1, 1], (100, )).astype(np.float32)
self.make_output(y_shape=[1, 50, 2, 1]) y = x[0, 0, :] + sgn * \
np.random.uniform(1, 2, (100, )).astype(np.float32)
self.inputs = {'X': x, 'Y': y}
self.outputs = {
'Out':
np.maximum(self.inputs['X'], self.inputs['Y'].reshape(1, 1, 100))
}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseMaxOp_broadcast_4(TestXPUElementwiseOp): class TestElementwiseMaxOp_broadcast_3(TestElementwiseOp):
def setUp(self): def setUp(self):
super(TestElementwiseMaxOp_broadcast_4, self).setUp() self.op_type = "elementwise_max"
self.make_input([2, 3, 4, 5], [2, 3, 1, 5]) x = np.random.uniform(0.5, 1, (2, 50, 2, 1)).astype(np.float32)
self.make_output() sgn = np.random.choice([-1, 1], (50, 2)).astype(np.float32)
y = x[0, :, :, 0] + sgn * \
np.random.uniform(1, 2, (50, 2)).astype(np.float32)
self.inputs = {'X': x, 'Y': y}
self.attrs = {'axis': 1}
self.outputs = {
'Out':
np.maximum(self.inputs['X'], self.inputs['Y'].reshape(1, 50, 2, 1))
}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseMaxOp_broadcast_5(TestXPUElementwiseOp): class TestElementwiseMaxOp_broadcast_4(TestElementwiseOp):
def setUp(self): def setUp(self):
super(TestElementwiseMaxOp_broadcast_5, self).setUp() self.op_type = "elementwise_max"
self.make_input([2, 3, 100], [1, 1, 100]) x = np.random.uniform(0.5, 1, (2, 3, 4, 5)).astype(np.float32)
self.make_output() sgn = np.random.choice([-1, 1], (2, 3, 1, 5)).astype(np.float32)
y = x + sgn * \
np.random.uniform(1, 2, (2, 3, 1, 5)).astype(np.float32)
self.inputs = {'X': x, 'Y': y}
self.outputs = {'Out': np.maximum(self.inputs['X'], self.inputs['Y'])}
if __name__ == '__main__': if __name__ == '__main__':
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
sys.path.append("..")
import unittest
import numpy as np
from op_test import OpTest, skip_check_grad_ci
import paddle.fluid as fluid
from paddle.fluid import compiler, Program, program_guard
import paddle
from op_test_xpu import XPUOpTest
paddle.enable_static()
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseOp(XPUOpTest):
def setUp(self):
self.op_type = "elementwise_min"
# If x and y have the same value, the min() is not differentiable.
# So we generate test data by the following method
# to avoid them being too close to each other.
x = np.random.uniform(0.1, 1, [13, 17]).astype("float32")
sgn = np.random.choice([-1, 1], [13, 17]).astype("float32")
y = x + sgn * np.random.uniform(0.1, 1, [13, 17]).astype("float32")
self.inputs = {'X': x, 'Y': y}
self.outputs = {'Out': np.minimum(self.inputs['X'], self.inputs['Y'])}
def test_check_output(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
def test_check_grad_normal(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X', 'Y'], 'Out')
def test_check_grad_ingore_x(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['Y'],
'Out',
max_relative_error=0.005,
no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X'],
'Out',
max_relative_error=0.005,
no_grad_set=set('Y'))
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1) to test broadcast.")
class TestElementwiseMinOp_scalar(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_min"
x = np.random.random_integers(-5, 5, [10, 3, 4]).astype("float32")
y = np.array([0.5]).astype("float32")
self.inputs = {'X': x, 'Y': y}
self.outputs = {'Out': np.minimum(self.inputs['X'], self.inputs['Y'])}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseMinOp_Vector(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_min"
x = np.random.random((100, )).astype("float32")
sgn = np.random.choice([-1, 1], (100, )).astype("float32")
y = x + sgn * np.random.uniform(0.1, 1, (100, )).astype("float32")
self.inputs = {'X': x, 'Y': y}
self.outputs = {'Out': np.minimum(self.inputs['X'], self.inputs['Y'])}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseMinOp_broadcast_0(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_min"
x = np.random.uniform(0.5, 1, (100, 3, 2)).astype(np.float32)
sgn = np.random.choice([-1, 1], (100, )).astype(np.float32)
y = x[:, 0, 0] + sgn * \
np.random.uniform(1, 2, (100, )).astype(np.float32)
self.inputs = {'X': x, 'Y': y}
self.attrs = {'axis': 0}
self.outputs = {
'Out':
np.minimum(self.inputs['X'], self.inputs['Y'].reshape(100, 1, 1))
}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseMinOp_broadcast_1(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_min"
x = np.random.uniform(0.5, 1, (2, 100, 3)).astype(np.float32)
sgn = np.random.choice([-1, 1], (100, )).astype(np.float32)
y = x[0, :, 0] + sgn * \
np.random.uniform(1, 2, (100, )).astype(np.float32)
self.inputs = {'X': x, 'Y': y}
self.attrs = {'axis': 1}
self.outputs = {
'Out':
np.minimum(self.inputs['X'], self.inputs['Y'].reshape(1, 100, 1))
}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseMinOp_broadcast_2(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_min"
x = np.random.uniform(0.5, 1, (2, 3, 100)).astype(np.float32)
sgn = np.random.choice([-1, 1], (100, )).astype(np.float32)
y = x[0, 0, :] + sgn * \
np.random.uniform(1, 2, (100, )).astype(np.float32)
self.inputs = {'X': x, 'Y': y}
self.outputs = {
'Out':
np.minimum(self.inputs['X'], self.inputs['Y'].reshape(1, 1, 100))
}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseMinOp_broadcast_3(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_min"
x = np.random.uniform(0.5, 1, (2, 25, 4, 1)).astype(np.float32)
sgn = np.random.choice([-1, 1], (25, 4)).astype(np.float32)
y = x[0, :, :, 0] + sgn * \
np.random.uniform(1, 2, (25, 4)).astype(np.float32)
self.inputs = {'X': x, 'Y': y}
self.attrs = {'axis': 1}
self.outputs = {
'Out':
np.minimum(self.inputs['X'], self.inputs['Y'].reshape(1, 25, 4, 1))
}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseMinOp_broadcast_4(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_min"
x = np.random.uniform(0.5, 1, (2, 10, 2, 5)).astype(np.float32)
sgn = np.random.choice([-1, 1], (2, 10, 1, 5)).astype(np.float32)
y = x + sgn * \
np.random.uniform(1, 2, (2, 10, 1, 5)).astype(np.float32)
self.inputs = {'X': x, 'Y': y}
self.outputs = {'Out': np.minimum(self.inputs['X'], self.inputs['Y'])}
if __name__ == '__main__':
unittest.main()
...@@ -19,58 +19,111 @@ from op_test import OpTest, skip_check_grad_ci ...@@ -19,58 +19,111 @@ from op_test import OpTest, skip_check_grad_ci
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import compiler, Program, program_guard from paddle.fluid import compiler, Program, program_guard
import paddle import paddle
from elementwise import TestXPUElementwiseOpBase from op_test_xpu import XPUOpTest
paddle.enable_static() paddle.enable_static()
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestXPUElementwiseMulOp(OpTest, TestXPUElementwiseOpBase): class ElementwiseMulOp(XPUOpTest):
def init_kernel_type(self): def init_kernel_type(self):
self.use_mkldnn = False self.use_mkldnn = False
def setUp(self): def setUp(self):
TestXPUElementwiseOpBase.setUp(self, "elementwise_mul") self.use_xpu = True
self.op_type = "elementwise_mul"
self.dtype = np.float32
self.axis = -1
self.init_dtype()
self.init_input_output()
self.init_kernel_type() self.init_kernel_type()
self.init_axis() self.init_axis()
self.attrs['axis'] = self.axis
self.attrs['use_mkldnn'] = self.use_mkldnn
self.grad_implemented = True
self.make_input()
self.make_output()
def make_output(self, x_shape=None, y_shape=None): self.inputs = {
x, y = self.reshape_input(x_shape, y_shape) 'X': OpTest.np_dtype_to_fluid_dtype(self.x),
self.outputs = {'Out': np.multiply(x, y)} 'Y': OpTest.np_dtype_to_fluid_dtype(self.y)
}
self.outputs = {'Out': self.out}
self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn}
def test_check_output(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
def test_check_grad_normal(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X', 'Y'],
'Out',
check_dygraph=(self.use_mkldnn == False))
def test_check_grad_ingore_x(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['Y'],
'Out',
no_grad_set=set("X"),
check_dygraph=(self.use_mkldnn == False))
def test_check_grad_ingore_y(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X'],
'Out',
no_grad_set=set('Y'),
check_dygraph=(self.use_mkldnn == False))
def init_input_output(self):
self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
self.y = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
self.out = np.multiply(self.x, self.y)
def init_dtype(self):
pass
def init_axis(self):
pass
@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1) to test broadcast.")
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestXPUElementwiseMulOp_scalar(TestXPUElementwiseMulOp): class TestElementwiseMulOp_scalar(ElementwiseMulOp):
def setUp(self): def setUp(self):
super(TestXPUElementwiseMulOp_scalar, self).setUp() self.op_type = "elementwise_mul"
self.make_input((10, 3, 4), (1, )) self.inputs = {
self.make_output() 'X': np.random.rand(10, 3, 4).astype(np.float32),
self.grad_implemented = False 'Y': np.random.rand(1).astype(np.float32)
}
self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']}
self.init_kernel_type()
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestXPUElementwiseMulOp_Vector(TestXPUElementwiseMulOp): class TestElementwiseMulOp_Vector(ElementwiseMulOp):
def setUp(self): def setUp(self):
super(TestXPUElementwiseMulOp_Vector, self).setUp() self.op_type = "elementwise_mul"
self.make_input((100, ), (100, )) self.inputs = {
self.make_output() 'X': np.random.random((100, )).astype("float32"),
'Y': np.random.random((100, )).astype("float32")
}
self.outputs = {'Out': np.multiply(self.inputs['X'], self.inputs['Y'])}
self.init_kernel_type()
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestXPUElementwiseMulOp_broadcast_0(TestXPUElementwiseMulOp): class TestElementwiseMulOp_broadcast_0(ElementwiseMulOp):
def setUp(self): def init_input_output(self):
super(TestXPUElementwiseMulOp_broadcast_0, self).setUp() self.x = np.random.rand(100, 2, 3).astype(self.dtype)
self.make_input((100, 2, 3), (100, )) self.y = np.random.rand(100).astype(self.dtype)
self.make_output(y_shape=(100, 1, 1)) self.out = self.x * self.y.reshape(100, 1, 1)
self.y_grad_implemented = False
def init_axis(self): def init_axis(self):
self.axis = 0 self.axis = 0
...@@ -78,75 +131,140 @@ class TestXPUElementwiseMulOp_broadcast_0(TestXPUElementwiseMulOp): ...@@ -78,75 +131,140 @@ class TestXPUElementwiseMulOp_broadcast_0(TestXPUElementwiseMulOp):
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseMulOp_broadcast_1(TestXPUElementwiseMulOp): class TestElementwiseMulOp_broadcast_1(ElementwiseMulOp):
def setUp(self):
self.op_type = "elementwise_mul"
self.inputs = {
'X': np.random.rand(2, 100, 3).astype(np.float32),
'Y': np.random.rand(100).astype(np.float32)
}
self.attrs = {'axis': 1}
self.outputs = {
'Out': self.inputs['X'] * self.inputs['Y'].reshape(1, 100, 1)
}
self.init_kernel_type()
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseMulOp_broadcast_2(ElementwiseMulOp):
def setUp(self): def setUp(self):
super(TestElementwiseMulOp_broadcast_1, self).setUp() self.op_type = "elementwise_mul"
self.attrs['axis'] = 1 self.inputs = {
self.y_grad_implemented = False 'X': np.random.rand(2, 3, 100).astype(np.float32),
self.make_input((2, 100, 3), (100, )) 'Y': np.random.rand(100).astype(np.float32)
self.make_output(y_shape=(1, 100, 1)) }
self.outputs = {
'Out': self.inputs['X'] * self.inputs['Y'].reshape(1, 1, 100)
}
self.init_kernel_type()
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseMulOp_broadcast_2(TestXPUElementwiseMulOp): class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp):
def setUp(self): def setUp(self):
super(TestElementwiseMulOp_broadcast_2, self).setUp() self.op_type = "elementwise_mul"
self.y_grad_implemented = False self.inputs = {
self.make_input((2, 3, 100), (100, )) 'X': np.random.rand(2, 10, 12, 3).astype(np.float32),
self.make_output(y_shape=(1, 1, 100)) 'Y': np.random.rand(10, 12).astype(np.float32)
}
self.attrs = {'axis': 1}
self.outputs = {
'Out': self.inputs['X'] * self.inputs['Y'].reshape(1, 10, 12, 1)
}
self.init_kernel_type()
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseMulOp_broadcast_3(TestXPUElementwiseMulOp): class TestElementwiseMulOp_broadcast_4(ElementwiseMulOp):
def setUp(self): def setUp(self):
super(TestElementwiseMulOp_broadcast_3, self).setUp() self.op_type = "elementwise_mul"
self.attrs['axis'] = 1 self.inputs = {
self.y_grad_implemented = False 'X': np.random.rand(10, 2, 11).astype(np.float32),
self.make_input((2, 10, 12, 3), (10, 12)) 'Y': np.random.rand(10, 1, 11).astype(np.float32)
self.make_output(y_shape=(1, 10, 12, 1)) }
self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']}
self.init_kernel_type()
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseMulOp_broadcast_4(TestXPUElementwiseMulOp): class TestElementwiseMulOp_broadcast_5(ElementwiseMulOp):
def setUp(self): def setUp(self):
super(TestElementwiseMulOp_broadcast_4, self).setUp() self.op_type = "elementwise_mul"
self.is_common_broadcast = True self.inputs = {
self.make_input((10, 2, 11), (10, 1, 11)) 'X': np.random.rand(10, 4, 2, 3).astype(np.float32),
self.make_output() 'Y': np.random.rand(10, 4, 1, 3).astype(np.float32)
}
self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']}
self.init_kernel_type()
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseMulOp_broadcast_5(TestXPUElementwiseMulOp): class TestElementwiseMulOp_commonuse_1(ElementwiseMulOp):
def setUp(self): def setUp(self):
super(TestElementwiseMulOp_broadcast_5, self).setUp() self.op_type = "elementwise_mul"
self.is_common_broadcast = True self.inputs = {
self.make_input((10, 4, 2, 3), (10, 4, 1, 3)) 'X': np.random.rand(2, 3, 100).astype(np.float32),
self.make_output() 'Y': np.random.rand(1, 1, 100).astype(np.float32)
}
self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']}
self.init_kernel_type()
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestXPUElementwiseMulOp_commonuse_1(TestXPUElementwiseMulOp): class TestElementwiseMulOp_commonuse_2(ElementwiseMulOp):
def setUp(self): def setUp(self):
super(TestXPUElementwiseMulOp_commonuse_1, self).setUp() self.op_type = "elementwise_mul"
self.is_common_broadcast = True self.inputs = {
self.make_input((2, 3, 100), (1, 1, 100)) 'X': np.random.rand(30, 3, 1, 5).astype(np.float32),
self.make_output() 'Y': np.random.rand(30, 1, 4, 1).astype(np.float32)
}
self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']}
self.init_kernel_type()
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestXPUElementwiseMulOp_xsize_lessthan_ysize(TestXPUElementwiseMulOp): class TestElementwiseMulOp_xsize_lessthan_ysize(ElementwiseMulOp):
def setUp(self): def setUp(self):
super(TestXPUElementwiseMulOp_xsize_lessthan_ysize, self).setUp() self.op_type = "elementwise_mul"
self.attrs['axis'] = 2 self.inputs = {
self.is_x_size_less_than_y = True 'X': np.random.rand(10, 10).astype(np.float32),
self.make_input((10, 10), (2, 2, 10, 10)) 'Y': np.random.rand(2, 2, 10, 10).astype(np.float32)
self.make_output(x_shape=(1, 1, 10, 10)) }
self.attrs = {'axis': 2}
self.outputs = {
'Out': self.inputs['X'].reshape(1, 1, 10, 10) * self.inputs['Y']
}
self.init_kernel_type()
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwiseMulOpError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
# the input of elementwise_mul must be Variable.
x1 = fluid.create_lod_tensor(
np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.XPUPlace(0))
y1 = fluid.create_lod_tensor(
np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.XPUPlace(0))
self.assertRaises(TypeError, fluid.layers.elementwise_mul, x1, y1)
# the input dtype of elementwise_mul must be float32
x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="uint8")
y2 = fluid.layers.data(name='y2', shape=[3, 4, 5, 6], dtype="uint8")
self.assertRaises(TypeError, fluid.layers.elementwise_mul, x2, y2)
if __name__ == '__main__': if __name__ == '__main__':
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
sys.path.append("..")
import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from op_test import OpTest, skip_check_grad_ci
from op_test_xpu import XPUOpTest
paddle.enable_static()
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwisePowOp(XPUOpTest):
def setUp(self):
self.op_type = "elementwise_pow"
self.inputs = {
'X': np.random.uniform(1, 2, [20, 5]).astype("float32"),
'Y': np.random.uniform(1, 2, [20, 5]).astype("float32")
}
self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])}
def test_check_output(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
def test_check_grad_normal(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X', 'Y'], 'Out')
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwisePowOp_big_shape_1(TestElementwisePowOp):
def setUp(self):
self.op_type = "elementwise_pow"
self.inputs = {
'X': np.random.uniform(1, 2, [10, 10]).astype("float32"),
'Y': np.random.uniform(0.1, 1, [10, 10]).astype("float32")
}
self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwisePowOp_big_shape_2(TestElementwisePowOp):
def setUp(self):
self.op_type = "elementwise_pow"
self.inputs = {
'X': np.random.uniform(1, 2, [10, 10]).astype("float32"),
'Y': np.random.uniform(0.2, 2, [10, 10]).astype("float32")
}
self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1) to test broadcast.")
class TestElementwisePowOp_scalar(TestElementwisePowOp):
def setUp(self):
self.op_type = "elementwise_pow"
self.inputs = {
'X': np.random.uniform(0.1, 1, [3, 3, 4]).astype(np.float32),
'Y': np.random.uniform(0.1, 1, [1]).astype(np.float32)
}
self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwisePowOp_tensor(TestElementwisePowOp):
def setUp(self):
self.op_type = "elementwise_pow"
self.inputs = {
'X': np.random.uniform(0.1, 1, [100]).astype("float32"),
'Y': np.random.uniform(1, 3, [100]).astype("float32")
}
self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwisePowOp_broadcast_0(TestElementwisePowOp):
def setUp(self):
self.op_type = "elementwise_pow"
self.inputs = {
'X': np.random.uniform(0.1, 1, [2, 1, 100]).astype("float32"),
'Y': np.random.uniform(0.1, 1, [100]).astype("float32")
}
self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwisePowOp_broadcast_1(TestElementwisePowOp):
def setUp(self):
self.op_type = "elementwise_pow"
self.inputs = {
'X': np.random.uniform(0.1, 1, [2, 100, 1]).astype("float32"),
'Y': np.random.uniform(0.1, 1, [100]).astype("float32")
}
self.attrs = {'axis': 1}
self.outputs = {
'Out': np.power(self.inputs['X'], self.inputs['Y'].reshape(100, 1))
}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwisePowOp_broadcast_2(TestElementwisePowOp):
def setUp(self):
self.op_type = "elementwise_pow"
self.inputs = {
'X': np.random.uniform(0.1, 1, [100, 3, 1]).astype("float32"),
'Y': np.random.uniform(0.1, 1, [100]).astype("float32")
}
self.attrs = {'axis': 0}
self.outputs = {
'Out':
np.power(self.inputs['X'], self.inputs['Y'].reshape(100, 1, 1))
}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwisePowOp_broadcast_3(TestElementwisePowOp):
def setUp(self):
self.op_type = "elementwise_pow"
self.inputs = {
'X': np.random.uniform(0.1, 1, [2, 20, 5, 1]).astype("float32"),
'Y': np.random.uniform(0.1, 1, [20, 5]).astype("float32")
}
self.attrs = {'axis': 1}
self.outputs = {
'Out': np.power(self.inputs['X'], self.inputs['Y'].reshape(1, 20, 5,
1))
}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwisePowOp_broadcast_4(TestElementwisePowOp):
def setUp(self):
self.op_type = "elementwise_pow"
self.inputs = {
'X': np.random.uniform(0.1, 1, [2, 10, 3, 5]).astype("float32"),
'Y': np.random.uniform(0.1, 1, [2, 10, 1, 5]).astype("float32")
}
self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestElementwisePowOpInt(OpTest):
def setUp(self):
self.op_type = "elementwise_pow"
self.inputs = {'X': np.asarray([1, 3, 6]), 'Y': np.asarray([1, 1, 1])}
self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])}
def test_check_output(self):
self.check_output()
if __name__ == '__main__':
unittest.main()
...@@ -11,117 +11,198 @@ ...@@ -11,117 +11,198 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import unittest
import numpy as np import numpy as np
import sys import sys
sys.path.append("..") sys.path.append("..")
from op_test import OpTest, skip_check_grad_ci
import paddle import paddle
from elementwise import TestXPUElementwiseOpBase from op_test import OpTest, skip_check_grad_ci
from op_test_xpu import XPUOpTest
import unittest
paddle.enable_static() paddle.enable_static()
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestXPUElementwiseSubOp(OpTest, TestXPUElementwiseOpBase): class TestElementwiseOp(OpTest):
def setUp(self): def setUp(self):
TestXPUElementwiseOpBase.setUp(self, "elementwise_sub") self.use_xpu = True
self.make_input() self.op_type = "elementwise_sub"
self.make_output() self.inputs = {
self.grad_implemented = True 'X': np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype("float32"),
'Y': np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype("float32")
}
self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']}
def test_check_output(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place, atol=1e-3)
def test_check_grad_normal(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X', 'Y'], 'Out')
def test_check_grad_ingore_x(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['Y'],
'Out',
max_relative_error=0.005,
no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X'],
'Out',
max_relative_error=0.005,
no_grad_set=set('Y'))
def make_output(self, x_shape=None, y_shape=None):
x, y = self.reshape_input(x_shape, y_shape) @unittest.skipIf(not paddle.is_compiled_with_xpu(),
self.outputs = {'Out': x - y} "core is not compiled with XPU")
@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1) to test broadcast.")
class TestElementwiseSubOp_scalar(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_sub"
self.inputs = {
'X': np.random.rand(10, 3, 4).astype(np.float32),
'Y': np.random.rand(1).astype(np.float32)
}
self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseSubOp_scalar(TestXPUElementwiseSubOp): class TestElementwiseSubOp_Vector(TestElementwiseOp):
def setUp(self): def setUp(self):
super(TestElementwiseSubOp_scalar, self).setUp() self.op_type = "elementwise_sub"
self.grad_implemented = False self.inputs = {
self.make_input((10, 3, 4), (1, )) 'X': np.random.random((100, )).astype("float32"),
self.make_output() 'Y': np.random.random((100, )).astype("float32")
}
self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseSubOp_Vector(TestXPUElementwiseSubOp): class TestElementwiseSubOp_broadcast_0(TestElementwiseOp):
def setUp(self): def setUp(self):
super(TestElementwiseSubOp_Vector, self).setUp() self.op_type = "elementwise_sub"
self.make_input((100, ), (100, )) self.inputs = {
self.make_output() 'X': np.random.rand(100, 3, 2).astype(np.float32),
'Y': np.random.rand(100).astype(np.float32)
}
self.attrs = {'axis': 0}
self.outputs = {
'Out': self.inputs['X'] - self.inputs['Y'].reshape(100, 1, 1)
}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseSubOp_broadcast_0(TestXPUElementwiseSubOp): class TestElementwiseSubOp_broadcast_1(TestElementwiseOp):
def setUp(self): def setUp(self):
super(TestElementwiseSubOp_broadcast_0, self).setUp() self.op_type = "elementwise_sub"
self.attrs['axis'] = 0 self.inputs = {
self.make_input((100, 3, 2), (100, )) 'X': np.random.rand(2, 100, 3).astype(np.float32),
self.make_output(y_shape=(100, 1, 1)) 'Y': np.random.rand(100).astype(np.float32)
}
self.attrs = {'axis': 1}
self.outputs = {
'Out': self.inputs['X'] - self.inputs['Y'].reshape(1, 100, 1)
}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseSubOp_broadcast_1(TestXPUElementwiseSubOp): class TestElementwiseSubOp_broadcast_2(TestElementwiseOp):
def setUp(self): def setUp(self):
super(TestElementwiseSubOp_broadcast_1, self).setUp() self.op_type = "elementwise_sub"
self.attrs['axis'] = 1 self.inputs = {
self.make_input((2, 100, 3), (100, )) 'X': np.random.rand(2, 3, 100).astype(np.float32),
self.make_output(y_shape=(1, 100, 1)) 'Y': np.random.rand(100).astype(np.float32)
}
self.outputs = {
'Out': self.inputs['X'] - self.inputs['Y'].reshape(1, 1, 100)
}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseSubOp_broadcast_2(TestXPUElementwiseSubOp): class TestElementwiseSubOp_broadcast_3(TestElementwiseOp):
def setUp(self): def setUp(self):
super(TestElementwiseSubOp_broadcast_2, self).setUp() self.op_type = "elementwise_sub"
self.make_input((2, 3, 100), (100, )) self.inputs = {
self.make_output(y_shape=(1, 1, 100)) 'X': np.random.rand(2, 10, 12, 3).astype(np.float32),
'Y': np.random.rand(10, 12).astype(np.float32)
}
self.attrs = {'axis': 1}
self.outputs = {
'Out': self.inputs['X'] - self.inputs['Y'].reshape(1, 10, 12, 1)
}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseSubOp_broadcast_3(TestXPUElementwiseSubOp): class TestElementwiseSubOp_broadcast_4(TestElementwiseOp):
def setUp(self): def setUp(self):
super(TestElementwiseSubOp_broadcast_3, self).setUp() self.op_type = "elementwise_sub"
self.attrs['axis'] = 1 self.inputs = {
self.make_input((2, 10, 12, 3), (10, 12)) 'X': np.random.rand(2, 5, 3, 12).astype(np.float32),
self.make_output(y_shape=(1, 10, 12, 1)) 'Y': np.random.rand(2, 5, 1, 12).astype(np.float32)
}
self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseSubOp_broadcast_4(TestXPUElementwiseSubOp): class TestElementwiseSubOp_commonuse_1(TestElementwiseOp):
def setUp(self): def setUp(self):
super(TestElementwiseSubOp_broadcast_4, self).setUp() self.op_type = "elementwise_sub"
self.is_common_broadcast = True self.inputs = {
self.make_input((2, 5, 3, 12), (2, 5, 1, 12)) 'X': np.random.rand(2, 3, 100).astype(np.float32),
self.make_output() 'Y': np.random.rand(1, 1, 100).astype(np.float32)
}
self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseSubOp_commonuse_1(TestXPUElementwiseSubOp): class TestElementwiseSubOp_commonuse_2(TestElementwiseOp):
def setUp(self): def setUp(self):
super(TestElementwiseSubOp_commonuse_1, self).setUp() self.op_type = "elementwise_sub"
self.is_common_broadcast = True self.inputs = {
self.make_input((2, 3, 100), (1, 1, 100)) 'X': np.random.rand(10, 3, 1, 4).astype(np.float32),
self.make_output() 'Y': np.random.rand(10, 1, 12, 1).astype(np.float32)
}
self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']}
@unittest.skipIf(not paddle.is_compiled_with_xpu(), @unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU") "core is not compiled with XPU")
class TestElementwiseSubOp_xsize_lessthan_ysize(TestXPUElementwiseSubOp): class TestElementwiseSubOp_xsize_lessthan_ysize(TestElementwiseOp):
def setUp(self): def setUp(self):
super(TestElementwiseSubOp_xsize_lessthan_ysize, self).setUp() self.op_type = "elementwise_sub"
self.attrs['axis'] = 2 self.inputs = {
self.is_x_size_less_than_y = True 'X': np.random.rand(10, 12).astype(np.float32),
self.make_input((10, 12), (2, 3, 10, 12)) 'Y': np.random.rand(2, 3, 10, 12).astype(np.float32)
self.make_output(x_shape=(1, 1, 10, 12)) }
self.attrs = {'axis': 2}
self.outputs = {
'Out': self.inputs['X'].reshape(1, 1, 10, 12) - self.inputs['Y']
}
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -13,16 +13,15 @@ ...@@ -13,16 +13,15 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from test_softmax_op import stable_softmax
from op_test import OpTest
import paddle.fluid.core as core
import paddle
import unittest import unittest
import numpy as np import numpy as np
import sys import sys
sys.path.append("..") sys.path.append("..")
import paddle
import paddle.fluid.core as core
from op_test import OpTest
from test_softmax_op import stable_softmax
def cross_entropy(softmax, label, soft_label, axis, ignore_index=-1): def cross_entropy(softmax, label, soft_label, axis, ignore_index=-1):
...@@ -54,10 +53,11 @@ class TestSoftmaxWithCrossEntropyOp(OpTest): ...@@ -54,10 +53,11 @@ class TestSoftmaxWithCrossEntropyOp(OpTest):
self.op_type = "softmax_with_cross_entropy" self.op_type = "softmax_with_cross_entropy"
self.numeric_stable_mode = False self.numeric_stable_mode = False
self.soft_label = False self.soft_label = False
self.dtype = np.float64 self.dtype = np.float32
self.axis = -1 self.axis = -1
self.ignore_index = -1 self.ignore_index = -1
self.shape = [41, 37] self.shape = [41, 37]
self.use_xpu = True
def setUp(self): def setUp(self):
self.initParams() self.initParams()
...@@ -103,7 +103,7 @@ class TestSoftmaxWithCrossEntropyOp(OpTest): ...@@ -103,7 +103,7 @@ class TestSoftmaxWithCrossEntropyOp(OpTest):
paddle.enable_static() paddle.enable_static()
place = paddle.XPUPlace(0) place = paddle.XPUPlace(0)
self.check_grad_with_place( self.check_grad_with_place(
place, ["Logits"], "Loss", max_relative_error=0.1) place, ["Logits"], "Loss", max_relative_error=0.2)
class TestXPUSoftmaxWithCrossEntropyOp(TestSoftmaxWithCrossEntropyOp): class TestXPUSoftmaxWithCrossEntropyOp(TestSoftmaxWithCrossEntropyOp):
...@@ -115,6 +115,7 @@ class TestXPUSoftmaxWithCrossEntropyOp(TestSoftmaxWithCrossEntropyOp): ...@@ -115,6 +115,7 @@ class TestXPUSoftmaxWithCrossEntropyOp(TestSoftmaxWithCrossEntropyOp):
self.axis = -1 self.axis = -1
self.ignore_index = -1 self.ignore_index = -1
self.dtype = np.float32 self.dtype = np.float32
self.use_xpu = True
def test_check_output(self): def test_check_output(self):
if paddle.is_compiled_with_xpu(): if paddle.is_compiled_with_xpu():
...@@ -127,7 +128,7 @@ class TestXPUSoftmaxWithCrossEntropyOp(TestSoftmaxWithCrossEntropyOp): ...@@ -127,7 +128,7 @@ class TestXPUSoftmaxWithCrossEntropyOp(TestSoftmaxWithCrossEntropyOp):
paddle.enable_static() paddle.enable_static()
place = paddle.XPUPlace(0) place = paddle.XPUPlace(0)
self.check_grad_with_place( self.check_grad_with_place(
place, ["Logits"], "Loss", max_relative_error=0.1) place, ["Logits"], "Loss", max_relative_error=0.2)
class TestXPUSoftmaxWithCrossEntropyOp2(TestXPUSoftmaxWithCrossEntropyOp): class TestXPUSoftmaxWithCrossEntropyOp2(TestXPUSoftmaxWithCrossEntropyOp):
...@@ -139,10 +140,11 @@ class TestXPUSoftmaxWithCrossEntropyOp2(TestXPUSoftmaxWithCrossEntropyOp): ...@@ -139,10 +140,11 @@ class TestXPUSoftmaxWithCrossEntropyOp2(TestXPUSoftmaxWithCrossEntropyOp):
self.op_type = "softmax_with_cross_entropy" self.op_type = "softmax_with_cross_entropy"
self.numeric_stable_mode = True self.numeric_stable_mode = True
self.soft_label = True self.soft_label = True
self.dtype = np.float64 self.dtype = np.float32
self.axis = -1 self.axis = -1
self.ignore_index = -1 self.ignore_index = -1
self.shape = [41, 37] self.shape = [41, 37]
self.use_xpu = True
def test_check_output(self): def test_check_output(self):
if paddle.is_compiled_with_xpu(): if paddle.is_compiled_with_xpu():
...@@ -155,7 +157,7 @@ class TestXPUSoftmaxWithCrossEntropyOp2(TestXPUSoftmaxWithCrossEntropyOp): ...@@ -155,7 +157,7 @@ class TestXPUSoftmaxWithCrossEntropyOp2(TestXPUSoftmaxWithCrossEntropyOp):
paddle.enable_static() paddle.enable_static()
place = paddle.XPUPlace(0) place = paddle.XPUPlace(0)
self.check_grad_with_place( self.check_grad_with_place(
place, ["Logits"], "Loss", max_relative_error=0.1) place, ["Logits"], "Loss", max_relative_error=0.2)
class TestXPUSoftmaxWithCrossEntropyOp3(TestXPUSoftmaxWithCrossEntropyOp): class TestXPUSoftmaxWithCrossEntropyOp3(TestXPUSoftmaxWithCrossEntropyOp):
...@@ -170,55 +172,56 @@ class TestXPUSoftmaxWithCrossEntropyOp3(TestXPUSoftmaxWithCrossEntropyOp): ...@@ -170,55 +172,56 @@ class TestXPUSoftmaxWithCrossEntropyOp3(TestXPUSoftmaxWithCrossEntropyOp):
self.shape = [41, 37] self.shape = [41, 37]
self.ignore_index = 5 self.ignore_index = 5
self.axis = -1 self.axis = -1
self.dtype = np.float64 self.dtype = np.float32
class TestXPUSoftmaxWithCrossEntropyOpAxis1(TestXPUSoftmaxWithCrossEntropyOp):
"""
Test softmax with cross entropy operator with discreate one-hot labels.
Given axis != -1
"""
def initParams(self):
self.op_type = "softmax_with_cross_entropy"
self.numeric_stable_mode = True
self.soft_label = False
self.dtype = np.float64
self.axis = 0
self.ignore_index = -1
self.shape = [3, 5, 7, 11]
class TestXPUSoftmaxWithCrossEntropyOpAxis2(TestXPUSoftmaxWithCrossEntropyOp):
"""
Test softmax with cross entropy operator with discreate one-hot labels.
Given axis != -1
"""
def initParams(self):
self.op_type = "softmax_with_cross_entropy"
self.numeric_stable_mode = True
self.soft_label = False
self.dtype = np.float64
self.axis = 1
self.ignore_index = -1
self.shape = [3, 5, 7, 11]
class TestXPUSoftmaxWithCrossEntropyOpAxis3(TestXPUSoftmaxWithCrossEntropyOp):
"""
Test softmax with cross entropy operator with discreate one-hot labels.
Given axis != -1
"""
def initParams(self): # xpu only support axis = rank -1
self.op_type = "softmax_with_cross_entropy" # class TestXPUSoftmaxWithCrossEntropyOpAxis1(TestXPUSoftmaxWithCrossEntropyOp):
self.numeric_stable_mode = True # """
self.soft_label = False # Test softmax with cross entropy operator with discreate one-hot labels.
self.dtype = np.float64 # Given axis != -1
self.axis = 2 # """
self.ignore_index = -1
self.shape = [3, 5, 7, 11] # def initParams(self):
# self.op_type = "softmax_with_cross_entropy"
# self.numeric_stable_mode = True
# self.soft_label = False
# self.dtype = np.float32
# self.axis = 0
# self.ignore_index = -1
# self.shape = [3, 5, 7, 11]
# xpu only support axis = rank -1
# class TestXPUSoftmaxWithCrossEntropyOpAxis2(TestXPUSoftmaxWithCrossEntropyOp):
# """
# Test softmax with cross entropy operator with discreate one-hot labels.
# Given axis != -1
# """
# def initParams(self):
# self.op_type = "softmax_with_cross_entropy"
# self.numeric_stable_mode = True
# self.soft_label = False
# self.dtype = np.float32
# self.axis = 1
# self.ignore_index = -1
# self.shape = [3, 5, 7, 11]
# xpu only support axis = rank -1
# class TestXPUSoftmaxWithCrossEntropyOpAxis3(TestXPUSoftmaxWithCrossEntropyOp):
# """
# Test softmax with cross entropy operator with discreate one-hot labels.
# Given axis != -1
# """
# def initParams(self):
# self.op_type = "softmax_with_cross_entropy"
# self.numeric_stable_mode = True
# self.soft_label = False
# self.dtype = np.float32
# self.axis = 2
# self.ignore_index = -1
# self.shape = [3, 5, 7, 11]
class TestXPUSoftmaxWithCrossEntropyOpAxis4(TestXPUSoftmaxWithCrossEntropyOp): class TestXPUSoftmaxWithCrossEntropyOpAxis4(TestXPUSoftmaxWithCrossEntropyOp):
...@@ -231,7 +234,7 @@ class TestXPUSoftmaxWithCrossEntropyOpAxis4(TestXPUSoftmaxWithCrossEntropyOp): ...@@ -231,7 +234,7 @@ class TestXPUSoftmaxWithCrossEntropyOpAxis4(TestXPUSoftmaxWithCrossEntropyOp):
self.op_type = "softmax_with_cross_entropy" self.op_type = "softmax_with_cross_entropy"
self.numeric_stable_mode = True self.numeric_stable_mode = True
self.soft_label = False self.soft_label = False
self.dtype = np.float64 self.dtype = np.float32
self.axis = 3 self.axis = 3
self.ignore_index = -1 self.ignore_index = -1
self.shape = [3, 5, 7, 11] self.shape = [3, 5, 7, 11]
...@@ -248,46 +251,47 @@ class TestXPUSoftmaxWithCrossEntropyOpAxisDimEqualOne( ...@@ -248,46 +251,47 @@ class TestXPUSoftmaxWithCrossEntropyOpAxisDimEqualOne(
self.op_type = "softmax_with_cross_entropy" self.op_type = "softmax_with_cross_entropy"
self.numeric_stable_mode = True self.numeric_stable_mode = True
self.soft_label = False self.soft_label = False
self.dtype = np.float64 self.dtype = np.float32
self.axis = -1 self.axis = -1
self.ignore_index = -1 self.ignore_index = -1
self.shape = [3, 5, 7, 1] self.shape = [3, 5, 7, 1]
class TestXPUSoftmaxWithCrossEntropyOpSoftLabelAxis1( # xpu only support axis = rank -1
TestXPUSoftmaxWithCrossEntropyOp): # class TestXPUSoftmaxWithCrossEntropyOpSoftLabelAxis1(
def initParams(self): # TestXPUSoftmaxWithCrossEntropyOp):
self.op_type = "softmax_with_cross_entropy" # def initParams(self):
self.numeric_stable_mode = True # self.op_type = "softmax_with_cross_entropy"
self.soft_label = True # self.numeric_stable_mode = True
self.shape = [3, 5, 7, 11] # self.soft_label = True
self.axis = 0 # self.shape = [3, 5, 7, 11]
self.ignore_index = -1 # self.axis = 0
self.dtype = np.float64 # self.ignore_index = -1
# self.dtype = np.float32
class TestXPUSoftmaxWithCrossEntropyOpSoftLabelAxis2( # xpu only support axis = rank -1
TestXPUSoftmaxWithCrossEntropyOp2): # class TestXPUSoftmaxWithCrossEntropyOpSoftLabelAxis2(
def initParams(self): # TestXPUSoftmaxWithCrossEntropyOp2):
self.op_type = "softmax_with_cross_entropy" # def initParams(self):
self.numeric_stable_mode = True # self.op_type = "softmax_with_cross_entropy"
self.soft_label = True # self.numeric_stable_mode = True
self.shape = [3, 5, 7, 11] # self.soft_label = True
self.axis = 1 # self.shape = [3, 5, 7, 11]
self.ignore_index = -1 # self.axis = 1
self.dtype = np.float64 # self.ignore_index = -1
# self.dtype = np.float32
class TestXPUSoftmaxWithCrossEntropyOpSoftLabelAxis3( # xpu only support axis = rank -1
TestXPUSoftmaxWithCrossEntropyOp2): # class TestXPUSoftmaxWithCrossEntropyOpSoftLabelAxis3(
def initParams(self): # TestXPUSoftmaxWithCrossEntropyOp2):
self.op_type = "softmax_with_cross_entropy" # def initParams(self):
self.numeric_stable_mode = True # self.op_type = "softmax_with_cross_entropy"
self.soft_label = True # self.numeric_stable_mode = True
self.shape = [3, 5, 7, 11] # self.soft_label = True
self.axis = 2 # self.shape = [3, 5, 7, 11]
self.ignore_index = -1 # self.axis = 2
self.dtype = np.float64 # self.ignore_index = -1
# self.dtype = np.float32
class TestXPUSoftmaxWithCrossEntropyOpSoftLabelAxis4( class TestXPUSoftmaxWithCrossEntropyOpSoftLabelAxis4(
...@@ -299,43 +303,44 @@ class TestXPUSoftmaxWithCrossEntropyOpSoftLabelAxis4( ...@@ -299,43 +303,44 @@ class TestXPUSoftmaxWithCrossEntropyOpSoftLabelAxis4(
self.shape = [3, 5, 7, 11] self.shape = [3, 5, 7, 11]
self.axis = 3 self.axis = 3
self.ignore_index = -1 self.ignore_index = -1
self.dtype = np.float64 self.dtype = np.float32
class TestXPUSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis1(
TestXPUSoftmaxWithCrossEntropyOp3):
def initParams(self):
self.op_type = "softmax_with_cross_entropy"
self.numeric_stable_mode = True
self.soft_label = False
self.shape = [3, 5, 7, 11]
self.ignore_index = 1
self.axis = 0
self.dtype = np.float64
class TestXPUSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis2(
TestXPUSoftmaxWithCrossEntropyOp3):
def initParams(self):
self.op_type = "softmax_with_cross_entropy"
self.numeric_stable_mode = True
self.soft_label = False
self.shape = [3, 5, 7, 11]
self.ignore_index = 0
self.axis = 1
self.dtype = np.float64
class TestXPUSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis3( # xpu only support axis = rank -1
TestXPUSoftmaxWithCrossEntropyOp3): # class TestXPUSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis1(
def initParams(self): # TestXPUSoftmaxWithCrossEntropyOp3):
self.op_type = "softmax_with_cross_entropy" # def initParams(self):
self.numeric_stable_mode = True # self.op_type = "softmax_with_cross_entropy"
self.soft_label = False # self.numeric_stable_mode = True
self.shape = [3, 5, 7, 11] # self.soft_label = False
self.ignore_index = 3 # self.shape = [3, 5, 7, 11]
self.axis = 2 # self.ignore_index = 1
self.dtype = np.float64 # self.axis = 0
# self.dtype = np.float32
# xpu only support axis = rank -1
# class TestXPUSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis2(
# TestXPUSoftmaxWithCrossEntropyOp3):
# def initParams(self):
# self.op_type = "softmax_with_cross_entropy"
# self.numeric_stable_mode = True
# self.soft_label = False
# self.shape = [3, 5, 7, 11]
# self.ignore_index = 0
# self.axis = 1
# self.dtype = np.float32
# xpu only support axis = rank -1
# class TestXPUSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis3(
# TestXPUSoftmaxWithCrossEntropyOp3):
# def initParams(self):
# self.op_type = "softmax_with_cross_entropy"
# self.numeric_stable_mode = True
# self.soft_label = False
# self.shape = [3, 5, 7, 11]
# self.ignore_index = 3
# self.axis = 2
# self.dtype = np.float32
class TestXPUSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis4( class TestXPUSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis4(
...@@ -347,7 +352,7 @@ class TestXPUSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis4( ...@@ -347,7 +352,7 @@ class TestXPUSoftmaxWithCrossEntropyOpIgnoreIndexNoCudnnAxis4(
self.shape = [3, 5, 7, 11] self.shape = [3, 5, 7, 11]
self.ignore_index = 3 self.ignore_index = 3
self.axis = 3 self.axis = 3
self.dtype = np.float64 self.dtype = np.float32
class TestXPUSoftmaxWithCrossEntropyOpBoundary0( class TestXPUSoftmaxWithCrossEntropyOpBoundary0(
...@@ -364,7 +369,7 @@ class TestXPUSoftmaxWithCrossEntropyOpBoundary0( ...@@ -364,7 +369,7 @@ class TestXPUSoftmaxWithCrossEntropyOpBoundary0(
self.shape = [3, 5, 7, 11] self.shape = [3, 5, 7, 11]
self.axis = -1 self.axis = -1
self.ignore_index = -1 self.ignore_index = -1
self.dtype = np.float64 self.dtype = np.float32
self.logits = np.full(self.shape, -500.0).astype(self.dtype) self.logits = np.full(self.shape, -500.0).astype(self.dtype)
...@@ -382,7 +387,7 @@ class TestXPUSoftmaxWithCrossEntropyOpBoundary1( ...@@ -382,7 +387,7 @@ class TestXPUSoftmaxWithCrossEntropyOpBoundary1(
self.shape = [3, 5, 7, 11] self.shape = [3, 5, 7, 11]
self.axis = -1 self.axis = -1
self.ignore_index = -1 self.ignore_index = -1
self.dtype = np.float64 self.dtype = np.float32
self.logits = np.full(self.shape, 1000.0).astype(self.dtype) self.logits = np.full(self.shape, 1000.0).astype(self.dtype)
self.logits[:, :, 0, :] = -1000.0 self.logits[:, :, 0, :] = -1000.0
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册