未验证 提交 6b727e08 编写于 作者: Q QingshuChen 提交者: GitHub

support elementwise add, activation, matmul on Baidu Kunlun (#27143)

* support elementwise add, activation, matmul on Baidu Kunlun
* test=kunlun

* minor
* test=kunlun

* reconstuct the xpu directory
* test=kunlun

* minor
* test=kunlun

* minor
* test=kunlun

* minor
* test=kunlun

* minor
* test=kunlun

* minor
* test=kunlun
上级 d37b3774
...@@ -4,7 +4,7 @@ endif() ...@@ -4,7 +4,7 @@ endif()
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
SET(XPU_PROJECT "extern_xpu") SET(XPU_PROJECT "extern_xpu")
SET(XPU_URL "https://kunlun1.su.bcebos.com/xpu.tar.gz" CACHE STRING "" FORCE) SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu.tar.gz" CACHE STRING "" FORCE)
SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu") SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu")
SET(XPU_DOWNLOAD_DIR "${XPU_SOURCE_DIR}/src/${XPU_PROJECT}") SET(XPU_DOWNLOAD_DIR "${XPU_SOURCE_DIR}/src/${XPU_PROJECT}")
SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu") SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu")
......
...@@ -62,9 +62,9 @@ function(op_library TARGET) ...@@ -62,9 +62,9 @@ function(op_library TARGET)
endif() endif()
endif() endif()
if(WITH_XPU) if(WITH_XPU)
string(REPLACE "_op" "_xpu_op" XPU_FILE "${TARGET}") string(REPLACE "_op" "_op_xpu" XPU_FILE "${TARGET}")
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/xpu/${XPU_FILE}.cc) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${XPU_FILE}.cc)
list(APPEND xpu_cc_srcs xpu/${XPU_FILE}.cc) list(APPEND xpu_cc_srcs ${XPU_FILE}.cc)
endif() endif()
endif() endif()
else() else()
...@@ -83,7 +83,7 @@ function(op_library TARGET) ...@@ -83,7 +83,7 @@ function(op_library TARGET)
list(APPEND mkldnn_cc_srcs ${src}) list(APPEND mkldnn_cc_srcs ${src})
elseif(${src} MATCHES ".*\\.cu.cc$") elseif(${src} MATCHES ".*\\.cu.cc$")
list(APPEND cu_cc_srcs ${src}) list(APPEND cu_cc_srcs ${src})
elseif(WITH_XPU AND ${src} MATCHES ".*_xpu_op.cc$") elseif(WITH_XPU AND ${src} MATCHES ".*_op_xpu.cc$")
list(APPEND xpu_cc_srcs ${src}) list(APPEND xpu_cc_srcs ${src})
elseif(${src} MATCHES ".*\\.cc$") elseif(${src} MATCHES ".*\\.cc$")
list(APPEND cc_srcs ${src}) list(APPEND cc_srcs ${src})
......
...@@ -127,11 +127,10 @@ void *Alloc<platform::XPUPlace>(const platform::XPUPlace &place, size_t size) { ...@@ -127,11 +127,10 @@ void *Alloc<platform::XPUPlace>(const platform::XPUPlace &place, size_t size) {
"Baidu Kunlun Card is properly installed.", "Baidu Kunlun Card is properly installed.",
ret)); ret));
ret = xpu_malloc(reinterpret_cast<void **>(&p), size); ret = xpu_malloc(reinterpret_cast<void **>(&p), size);
PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, PADDLE_ENFORCE_EQ(
platform::errors::External( ret, XPU_SUCCESS,
"XPU API return wrong value[%d], please check whether " platform::errors::External(
"Baidu Kunlun Card is properly installed.", "XPU API return wrong value[%d], no enough memory", ret));
ret));
if (FLAGS_init_allocated_mem) { if (FLAGS_init_allocated_mem) {
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unimplemented(
"xpu memory FLAGS_init_allocated_mem is not implemented.")); "xpu memory FLAGS_init_allocated_mem is not implemented."));
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/activation_op.h"
#include <string>
#include "paddle/fluid/platform/xpu_header.h"
namespace paddle {
namespace operators {
using paddle::framework::Tensor;
template <typename Functor>
class XPUActivationKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
public:
void Compute(const framework::ExecutionContext &context) const override {
Functor functor;
auto attrs = functor.GetAttrs();
for (auto &attr : attrs) {
*attr.second = context.Attr<float>(attr.first);
}
functor(context);
}
};
template <typename Functor>
class XPUActivationGradKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
public:
void Compute(const framework::ExecutionContext &context) const override {
Functor functor;
auto attrs = functor.GetAttrs();
for (auto &attr : attrs) {
*attr.second = context.Attr<float>(attr.first);
}
functor(context);
}
};
template <typename DeviceContext, typename T>
void xpu_activation_forward(const framework::ExecutionContext &ctx,
xpu::Activation_t type) {
const auto *x = ctx.Input<Tensor>("X");
auto *y = ctx.Output<Tensor>("Out");
const T *x_data = x->data<T>();
T *y_data = y->mutable_data<T>(ctx.GetPlace());
int r = 0;
if (xpu::Activation_t::ACT_POW == type.type) {
type.pow_factor = ctx.Attr<float>("factor");
}
auto xpu_context = ctx.device_context<DeviceContext>().x_context();
r = xpu::activation_forward(xpu_context, type, x->numel(),
reinterpret_cast<const float *>(x_data),
reinterpret_cast<float *>(y_data));
PADDLE_ENFORCE_EQ(r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
}
template <typename DeviceContext, typename T>
void xpu_activation_backward(const framework::ExecutionContext &ctx,
xpu::Activation_t type) {
/* TODO: relu tanh sigmoid are inplace */
const auto *x = ctx.Input<Tensor>("X");
auto *y = ctx.Input<Tensor>("Out");
auto *dOut = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto *dX = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
const T *x_data = nullptr;
const T *y_data = nullptr;
const T *y_grad = nullptr;
if (x != nullptr) x_data = x->data<T>();
if (y != nullptr) y_data = y->data<T>();
if (dOut != nullptr) y_grad = dOut->data<T>();
T *x_grad = dX->mutable_data<T>(ctx.GetPlace());
auto xpu_context = ctx.device_context<DeviceContext>().x_context();
int r = xpu::activation_backward(xpu_context, type, dX->numel(),
reinterpret_cast<const float *>(x_data),
reinterpret_cast<const float *>(y_data),
reinterpret_cast<const float *>(y_grad),
reinterpret_cast<float *>(x_grad));
PADDLE_ENFORCE_EQ(r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
}
template <typename T, xpu::Activation_t::act_enum algorithm>
struct XPUActivationFunc : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_forward<paddle::platform::XPUDeviceContext, T>(ctx,
algorithm);
}
};
template <typename T, xpu::Activation_t::act_enum algorithm>
struct XPUActivationGradFunc : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_backward<paddle::platform::XPUDeviceContext, T>(ctx,
algorithm);
}
};
template <typename T>
using XPUReluFunctor = XPUActivationFunc<T, xpu::Activation_t::RELU>;
template <typename T>
using XPUSigmoidFunctor = XPUActivationFunc<T, xpu::Activation_t::SIGMOID>;
template <typename T>
using XPUTanhFunctor = XPUActivationFunc<T, xpu::Activation_t::TANH>;
template <typename T>
using XPUGeluFunctor = XPUActivationFunc<T, xpu::Activation_t::GELU>;
template <typename T>
using XPULogFunctor = XPUActivationFunc<T, xpu::Activation_t::LOG>;
template <typename T>
using XPUSquareFunctor = XPUActivationFunc<T, xpu::Activation_t::SQUARE>;
template <typename T>
using XPUSuareGradFunctor = XPUActivationGradFunc<T, xpu::Activation_t::SQUARE>;
template <typename T>
using XPUReluGradFunctor = XPUActivationGradFunc<T, xpu::Activation_t::RELU>;
template <typename T>
using XPUSigmoidGradFunctor =
XPUActivationGradFunc<T, xpu::Activation_t::SIGMOID>;
template <typename T>
using XPUTanhGradFunctor = XPUActivationGradFunc<T, xpu::Activation_t::TANH>;
template <typename T>
using XPUGeluGradFunctor = XPUActivationGradFunc<T, xpu::Activation_t::GELU>;
template <typename T>
using XPUSqrtFunctor = XPUActivationFunc<T, xpu::Activation_t::SQRT>;
template <typename T>
using XPUSqrtGradFunctor = XPUActivationGradFunc<T, xpu::Activation_t::SQRT>;
template <typename T>
using XPUACTPowFunctor = XPUActivationFunc<T, xpu::Activation_t::ACT_POW>;
template <typename T>
using XPUABSFunctor = XPUActivationFunc<T, xpu::Activation_t::ABS>;
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
#define REGISTER_ACTIVATION_XPU_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_XPU_KERNEL(act_type, \
ops::XPUActivationKernel<ops::functor<float>>); \
REGISTER_OP_XPU_KERNEL( \
act_type##_grad, \
ops::XPUActivationGradKernel<ops::grad_functor<float>>);
REGISTER_ACTIVATION_XPU_KERNEL(relu, XPUReluFunctor, XPUReluGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(tanh, XPUTanhFunctor, XPUTanhGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(sigmoid, XPUSigmoidFunctor,
XPUSigmoidGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(gelu, XPUGeluFunctor, XPUGeluGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(sqrt, XPUSqrtFunctor, XPUSqrtGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(square, XPUSquareFunctor, XPUSuareGradFunctor)
REGISTER_OP_XPU_KERNEL(log,
ops::XPUActivationKernel<ops::XPULogFunctor<float>>);
REGISTER_OP_XPU_KERNEL(pow,
ops::XPUActivationKernel<ops::XPUACTPowFunctor<float>>);
REGISTER_OP_XPU_KERNEL(abs,
ops::XPUActivationKernel<ops::XPUABSFunctor<float>>);
#endif // PADDLE_WITH_XPU
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/elementwise/elementwise_add_op.h"
#include <memory>
#include <string>
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_xpu.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class ElementwiseAddXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
XPUElementwise<T, XPUAddFunctor<T>>(ctx);
}
};
template <typename DeviceContext, typename T>
class ElementwiseAddGradXPUKernel : public ElemwiseGradKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
ElemwiseGradKernel<T>::Compute(ctx);
using Tensor = framework::Tensor;
auto *dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto *dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto *dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
auto dx_dims = dout->dims();
auto dy_dims_untrimed = dout->dims();
T *dx_data = NULL;
T *dy_data = NULL;
int axis = ctx.Attr<int>("axis");
PADDLE_ENFORCE_GE(dx_dims.size(), dy_dims_untrimed.size(),
"Rank of first input must >= rank of second input.");
if (dx != nullptr) {
dx->mutable_data<T>(ctx.GetPlace());
dx_dims = dx->dims();
dx_data = dx->data<T>();
}
if (dy != nullptr) {
dy->mutable_data<T>(ctx.GetPlace());
dy_dims_untrimed = dy->dims();
dy_data = dy->data<T>();
}
int pre, n, post, is_common_broadcast;
if (dx_dims == dy_dims_untrimed) {
pre = post = 1;
n = dout->numel();
} else {
axis = (axis == -1 ? dx_dims.size() - dy_dims_untrimed.size() : axis);
PADDLE_ENFORCE(axis >= 0 && axis < dx_dims.size(),
"Axis should be in range [0, dx_dims)");
auto dy_dims = trim_trailing_singular_dims(dy_dims_untrimed);
axis = (dy_dims.size() == 0) ? dx_dims.size() : axis;
get_mid_dims(dx_dims, dy_dims, axis, &pre, &n, &post,
&is_common_broadcast);
}
int len = pre * n * post;
auto &dev_ctx =
ctx.template device_context<paddle::platform::XPUDeviceContext>();
if (post == 1) {
int r = xpu::matrix_vector_add_grad(
dev_ctx.x_context(), dout->data<T>(), dout->data<T>(),
dout->data<T>(), dout->data<T>(), dx_data, dy_data, pre, n);
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
return;
}
if (dx == nullptr) {
PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void **>(&dx_data), len * sizeof(float)),
XPU_SUCCESS, platform::errors::External("XPU has no enough memory"));
}
if (dy == nullptr) {
PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void **>(&dy_data), len * sizeof(float)),
XPU_SUCCESS, platform::errors::External("XPU has no enough memory"));
} else {
if (len != n) {
PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast<void **>(&dy_data),
len * sizeof(float)),
XPU_SUCCESS, platform::errors::External(
"XPU has no enough memory"));
}
}
int r = xpu::elementwise_add_grad(
dev_ctx.x_context(), dout->data<T>() /*x*/, dout->data<T>() /*y*/,
dout->data<T>() /*out*/, dout->data<T>(), dx_data, dy_data, len);
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
if ((dy != nullptr) && (len != n)) {
r = xpu::reduce_ew(dev_ctx.x_context(), dy_data, dy->data<T>(), pre, n,
post, xpu::ElementwiseOp::ASSIGN);
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
dev_ctx.Wait();
xpu_free(dy_data);
}
if ((dx == nullptr || dy == nullptr) && !(dy != nullptr && len != n)) {
dev_ctx.Wait();
}
if (dx == nullptr) {
xpu_free(dx_data);
}
if (dy == nullptr) {
xpu_free(dy_data);
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL(
elementwise_add,
ops::ElementwiseAddXPUKernel<paddle::platform::XPUDeviceContext, float>);
REGISTER_OP_XPU_KERNEL(elementwise_add_grad,
ops::ElementwiseAddGradXPUKernel<
paddle::platform::XPUDeviceContext, float>);
#endif
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
namespace operators {
template <typename T>
struct XPUAddFunctor {
int operator()(xpu::Context* ctx, const T* x, const T* y, T* z, int len) {
return xpu::elementwise_add(ctx, x, y, z, len);
}
};
template <typename T>
struct XPUMulFunctor {
int operator()(xpu::Context* ctx, const T* x, const T* y, T* z, int len) {
return xpu::elementwise_mul(ctx, x, y, z, len);
}
};
template <typename T, typename Functor>
void XPUElementwise(const framework::ExecutionContext& ctx) {
PADDLE_ENFORCE(platform::is_xpu_place(ctx.GetPlace()),
"This kernel only runs on XPU device.");
auto x_var = ctx.InputVar("X");
PADDLE_ENFORCE_NE(x_var, nullptr,
platform::errors::Fatal("Cannot get input Variable X"));
PADDLE_ENFORCE(x_var->IsType<framework::LoDTensor>(),
"XPU only support LoDTensor");
auto x = x_var->Get<framework::LoDTensor>();
auto* y = ctx.Input<framework::LoDTensor>("Y");
auto* z = ctx.Output<framework::LoDTensor>("Out");
z->mutable_data<T>(ctx.GetPlace());
int axis = ctx.Attr<int>("axis");
auto x_dims = x.dims();
auto y_dims_untrimed = y->dims();
PADDLE_ENFORCE_GE(x_dims.size(), y_dims_untrimed.size(),
"Rank of first input must >= rank of second input.");
axis = (axis == -1 ? x_dims.size() - y_dims_untrimed.size() : axis);
PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
"Axis should be in range [0, x_dims)");
auto y_dims = trim_trailing_singular_dims(y_dims_untrimed);
axis = (y_dims.size() == 0) ? x_dims.size() : axis;
int pre, n, post, is_common_broadcast;
get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post, &is_common_broadcast);
int len = pre * n * post;
const T* x_data = x.data<T>();
const T* y_data = y->data<T>();
T* z_data = z->data<T>();
T* y_broadcast = nullptr;
auto& dev_ctx =
ctx.template device_context<paddle::platform::XPUDeviceContext>();
if (post == 1) {
if (std::is_same<Functor, XPUAddFunctor<T>>::value) {
int res = xpu::matrix_vector_add(dev_ctx.x_context(), x_data, y_data,
z_data, pre, n);
PADDLE_ENFORCE(res == xpu::Error_t::SUCCESS, "XPU kernel error! res = %d",
res);
return;
}
if (std::is_same<Functor, XPUMulFunctor<T>>::value) {
int res = xpu::matrix_vector_mul(dev_ctx.x_context(), x_data, y_data,
z_data, pre, n);
PADDLE_ENFORCE(res == xpu::Error_t::SUCCESS, "XPU kernel error! res = %d",
res);
return;
}
}
if (pre != 1 || post != 1) {
PADDLE_ENFORCE(xpu_malloc(reinterpret_cast<void**>(&y_broadcast),
len * sizeof(T)) == XPU_SUCCESS);
int res = xpu::broadcast_ew(dev_ctx.x_context(), y_data, y_broadcast, pre,
n, post, xpu::ElementwiseOp::ASSIGN);
PADDLE_ENFORCE(res == xpu::Error_t::SUCCESS, "XPU kernel error! res = %d",
res);
y_data = y_broadcast;
}
Functor functor;
int res = functor(dev_ctx.x_context(), x_data, y_data, z_data, len);
PADDLE_ENFORCE(res == xpu::Error_t::SUCCESS, "XPU kernel error! res = %d",
res);
if (pre != 1 || post != 1) {
dev_ctx.Wait();
xpu_free(y_broadcast);
}
}
} // namespace operators
} // namespace paddle
#endif
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include <algorithm>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/blas.h"
namespace paddle {
namespace operators {
static framework::DDim RowMatrixFromVector(const framework::DDim &x_dim) {
if (x_dim.size() > 1) {
return x_dim;
}
return framework::make_ddim({1, x_dim[0]});
}
static framework::Tensor FoldInitDims(const framework::Tensor &input) {
auto output = input;
auto in_dims = input.dims();
if (in_dims.size() == 3) {
output.Resize({in_dims[0] * in_dims[1], in_dims[2]});
}
return output;
}
/**
* Get column matrix shape from a vector shape. If the ran of y_dim > 1, the
* original y_dim is returned.
*/
static framework::DDim ColumnMatrixFromVector(const framework::DDim &y_dim) {
if (y_dim.size() > 1) {
return y_dim;
}
return framework::make_ddim({y_dim[0], 1});
}
static void ReshapeTensorIntoMatrixSequence(
framework::Tensor *x, const math::MatDescriptor &descriptor) {
int64_t h, w;
h = descriptor.height_;
w = descriptor.width_;
if (descriptor.trans_) {
std::swap(w, h);
}
if (descriptor.batch_size_) {
x->Resize({descriptor.batch_size_, h, w});
} else {
x->Resize({h, w});
}
}
/**
* Reshape the x,y,out tensor to 3-D or 2-D tensor by matrix descriptor
* Out = matmul(x, y)
*
* This method will first calculate X,Y matrix sequence, and then calculate
* the out shape.
*
* Assume X = [BatchSize, H1, W1], Y = [BatchSize, H2, W2]
* The out = [BatchSize, H1, W2]
*
* If there is no batch size in `X` and `Y`, the out will be [H1, W2]
* If any of `X` and `Y` has batch size BatchSize, the out will have the
* BatchSize.
*/
static void ReshapeXYOutIntoMatrixSequence(framework::Tensor *x,
framework::Tensor *y,
framework::Tensor *out, bool trans_x,
bool trans_y) {
auto x_dim = RowMatrixFromVector(x->dims());
auto y_dim = ColumnMatrixFromVector(y->dims());
auto mat_dim_x = math::CreateMatrixDescriptor(x_dim, 0, trans_x);
auto mat_dim_y = math::CreateMatrixDescriptor(y_dim, 0, trans_y);
if (mat_dim_x.batch_size_ == 0 && mat_dim_y.batch_size_ == 0) {
out->Resize({mat_dim_x.height_, mat_dim_y.width_});
} else {
out->Resize({std::max(mat_dim_x.batch_size_, mat_dim_y.batch_size_),
mat_dim_x.height_, mat_dim_y.width_});
}
ReshapeTensorIntoMatrixSequence(x, mat_dim_x);
ReshapeTensorIntoMatrixSequence(y, mat_dim_y);
}
template <typename DeviceContext, typename T>
class MatMulXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
auto *x = context.Input<framework::Tensor>("X");
auto *y = context.Input<framework::Tensor>("Y");
auto *out = context.Output<framework::Tensor>("Out");
out->mutable_data<T>(context.GetPlace());
auto mat_dim_a = math::CreateMatrixDescriptor(
RowMatrixFromVector(x->dims()), 0, context.Attr<bool>("transpose_X"));
auto mat_dim_b =
math::CreateMatrixDescriptor(ColumnMatrixFromVector(y->dims()), 0,
context.Attr<bool>("transpose_Y"));
PADDLE_ENFORCE_EQ(
mat_dim_a.width_, mat_dim_b.height_,
platform::errors::InvalidArgument("Shape mistake in matmul_op"));
PADDLE_ENFORCE_EQ(
mat_dim_a.batch_size_, mat_dim_b.batch_size_,
platform::errors::InvalidArgument("Shape mistake in matmul_op"));
T alpha = static_cast<T>(context.Attr<float>("alpha"));
auto &dev_ctx = context.template device_context<DeviceContext>();
float *data_c = out->data<T>();
if (mat_dim_a.batch_size_ == 0 || mat_dim_a.batch_size_ == 1) {
int r =
xpu::fc_int16(dev_ctx.x_context(), mat_dim_a.trans_, mat_dim_b.trans_,
mat_dim_a.height_, mat_dim_b.width_, mat_dim_a.width_,
alpha, x->data<T>(), y->data<T>(), 0.0f, data_c);
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
} else {
// batch matmul
int r = xpu::batched_gemm_int16(dev_ctx.x_context(), mat_dim_a.trans_,
mat_dim_b.trans_, mat_dim_a.batch_size_,
mat_dim_a.height_, mat_dim_b.width_,
mat_dim_a.width_, alpha, x->data<T>(),
y->data<T>(), data_c, nullptr, nullptr);
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
}
}
};
// Reshape a rank-3 tensor from P x M x N to M x (P * N).
// (Warning: This requires transposing data and writes into new memory.)
// Identity op if the tensor is not of rank 3.
template <typename DeviceContext, typename T>
static framework::Tensor XPUFoldHeadAndLastDims(
const DeviceContext &context, const framework::Tensor &input) {
auto in_dims = input.dims();
if (in_dims.size() != 3) {
return input;
}
framework::Tensor output;
output.Resize({in_dims[1], in_dims[0], in_dims[2]});
output.mutable_data<T>(context.GetPlace());
std::vector<int> in_shape_host = {static_cast<int>(in_dims[0]),
static_cast<int>(in_dims[1]),
static_cast<int>(in_dims[2])};
std::vector<int> axis_host = {1, 0, 2};
int r = xpu::transpose(context.x_context(), input.data<T>(), output.data<T>(),
in_shape_host.data(), axis_host.data(), /*ndims=*/3);
PADDLE_ENFORCE_EQ(r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
output.Resize({in_dims[1], in_dims[0] * in_dims[2]});
return output;
}
// Using dimensional constraints on matrix multiplication, it is
// straight-forward to check the following table for when X and Y
// are both matrices.
//
// transpose_X | False | True | False | True
// transpose_Y | False | False | True | True
// -----------+----------+----------+----------+-----------
// dX = | dOut Y^T | Y dOut^T | dOut Y | Y^T dOut^T
// dY = | X^T dOut | X dOut | dOut^T X | dOut^T X^T
//
// When X is a vector of size K, we treat it instead as a matrix of shape
// (1, K). Similarly, when Y is a vector of size K, we treat it instead as
// a matrix of shape (K, 1).
//
// When X and Y are both 3-dimensional tensors, then the first dimension
// the batch dimension can be ignored and the exact same formulas apply
// as for two matrices.
//
// Finally, when, e.g., X is a 3-dimensional tensor but Y is a matrix, we end
// up with formulas like
//
// dY_{ij} = \sum_{p, m} X_{pmi} dOut_{pmj}
//
// To handle this sort of scenario, we reshape X : P x M x K, dOut: P x M x N
// to X: (P * M) x K, dOut: (P * M) x N.
template <typename DeviceContext, typename T>
class MatMulGradXPUKernel : public framework::OpKernel<T> {
public:
void MatMul(const framework::ExecutionContext &context,
const framework::Tensor &a, bool trans_a,
const framework::Tensor &b, bool trans_b,
framework::Tensor *out) const {
out->mutable_data<T>(context.GetPlace());
auto mat_dim_a = math::CreateMatrixDescriptor(a.dims(), 0, trans_a);
auto mat_dim_b = math::CreateMatrixDescriptor(b.dims(), 0, trans_b);
PADDLE_ENFORCE_EQ(
mat_dim_a.width_, mat_dim_b.height_,
platform::errors::InvalidArgument("Shape mistake in matmul_grad_op"));
PADDLE_ENFORCE_EQ(
mat_dim_a.batch_size_, mat_dim_b.batch_size_,
platform::errors::InvalidArgument("Shape mistake in matmul_grad_op"));
T alpha = static_cast<T>(context.Attr<float>("alpha"));
auto &dev_ctx = context.template device_context<DeviceContext>();
float *data_c = out->data<T>();
if (mat_dim_a.batch_size_ == 0 || mat_dim_a.batch_size_ == 1) {
int r =
xpu::fc_int16(dev_ctx.x_context(), mat_dim_a.trans_, mat_dim_b.trans_,
mat_dim_a.height_, mat_dim_b.width_, mat_dim_a.width_,
alpha, a.data<T>(), b.data<T>(), 0.0f, data_c);
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
} else {
// batch matmul
int r = xpu::batched_gemm_int16(dev_ctx.x_context(), mat_dim_a.trans_,
mat_dim_b.trans_, mat_dim_a.batch_size_,
mat_dim_a.height_, mat_dim_b.width_,
mat_dim_a.width_, alpha, a.data<T>(),
b.data<T>(), data_c, nullptr, nullptr);
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
}
}
void CalcInputGrad(const framework::ExecutionContext &context,
const framework::Tensor &a, bool trans_a,
bool is_fold_init_dims_a, const framework::Tensor &b,
bool trans_b, bool is_fold_init_dims_b,
framework::Tensor *out) const {
if (out == nullptr) return;
bool need_combine = (a.dims().size() == 3 || b.dims().size() == 3) &&
out->dims().size() == 2;
if (!need_combine) {
MatMul(context, a, trans_a, b, trans_b, out);
} else {
auto &dev_ctx = context.template device_context<DeviceContext>();
MatMul(
context, is_fold_init_dims_a
? FoldInitDims(a)
: XPUFoldHeadAndLastDims<DeviceContext, T>(dev_ctx, a),
trans_a, is_fold_init_dims_b
? FoldInitDims(b)
: XPUFoldHeadAndLastDims<DeviceContext, T>(dev_ctx, b),
trans_b, out);
}
}
void Compute(const framework::ExecutionContext &context) const override {
auto x = *context.Input<framework::Tensor>("X");
auto y = *context.Input<framework::Tensor>("Y");
auto dout =
*context.Input<framework::Tensor>(framework::GradVarName("Out"));
auto *dx = context.Output<framework::Tensor>(framework::GradVarName("X"));
auto *dy = context.Output<framework::Tensor>(framework::GradVarName("Y"));
bool transpose_x = context.Attr<bool>("transpose_X");
bool transpose_y = context.Attr<bool>("transpose_Y");
ReshapeXYOutIntoMatrixSequence(&x, &y, &dout, transpose_x, transpose_y);
framework::DDim dx_dims;
if (dx) {
dx_dims = dx->dims();
if (dx_dims != x.dims()) {
dx->Resize(x.dims());
}
}
framework::DDim dy_dims;
if (dy) {
dy_dims = dy->dims();
if (dy_dims != y.dims()) {
dy->Resize(y.dims());
}
}
if (transpose_x && transpose_y) {
CalcInputGrad(context, y, true, true, dout, true, false, dx);
CalcInputGrad(context, dout, true, true, x, true, false, dy);
} else if (transpose_x) {
CalcInputGrad(context, y, false, false, dout, true, false, dx);
CalcInputGrad(context, x, false, false, dout, false, true, dy);
} else if (transpose_y) {
CalcInputGrad(context, dout, false, false, y, false, true, dx);
CalcInputGrad(context, dout, true, true, x, false, true, dy);
} else {
CalcInputGrad(context, dout, false, false, y, true, false, dx);
CalcInputGrad(context, x, true, true, dout, false, true, dy);
}
if (dx) {
if (dx_dims != x.dims()) {
dx->Resize(dx_dims);
}
}
if (dy) {
if (dy_dims != y.dims()) {
dy->Resize(dy_dims);
}
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL(
matmul, ops::MatMulXPUKernel<paddle::platform::XPUDeviceContext, float>);
REGISTER_OP_XPU_KERNEL(
matmul_grad,
ops::MatMulGradXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif
...@@ -14,11 +14,11 @@ limitations under the License. */ ...@@ -14,11 +14,11 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/mul_op.h"
#include <memory> #include <memory>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include "paddle/fluid/operators/mul_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/xpu_info.h"
TEST(InitDevices, CPU) { TEST(InitDevices, CPU) {
using paddle::framework::InitDevices; using paddle::framework::InitDevices;
......
...@@ -15,9 +15,36 @@ ...@@ -15,9 +15,36 @@
#pragma once #pragma once
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include <string>
#include <unordered_map>
#include "paddle/fluid/platform/errors.h"
#include "xpu/api.h" #include "xpu/api.h"
#include "xpu/runtime.h" #include "xpu/runtime.h"
#include "xpu/runtime_ex.h" #include "xpu/runtime_ex.h"
namespace xpu = baidu::xpu::api; namespace xpu = baidu::xpu::api;
class XPUActHelper {
public:
// Convert string to activation type in xpu
static xpu::Activation_t ConvertToXpuActType(
const std::string& act_type_str) {
static std::unordered_map<std::string, xpu::Activation_t> str2act = {
{"linear", xpu::Activation_t::LINEAR},
{"relu", xpu::Activation_t::RELU},
{"sigmoid", xpu::Activation_t::SIGMOID},
{"tanh", xpu::Activation_t::TANH},
{"gelu", xpu::Activation_t::GELU},
{"leaky_relu", xpu::Activation_t::LEAKY_RELU},
{"sqrt", xpu::Activation_t::SQRT},
{"square", xpu::Activation_t::SQUARE}};
auto res = str2act.find(act_type_str);
PADDLE_ENFORCE_NE(res, str2act.end(),
paddle::platform::errors::InvalidArgument(
"Invalid activation type(%s) in XPU", act_type_str));
return res->second;
}
};
#endif #endif
...@@ -257,6 +257,8 @@ from .tensor.stat import numel #DEFINE_ALIAS ...@@ -257,6 +257,8 @@ from .tensor.stat import numel #DEFINE_ALIAS
from .device import get_cudnn_version from .device import get_cudnn_version
from .device import set_device from .device import set_device
from .device import get_device from .device import get_device
from .device import is_compiled_with_xpu
from .device import XPUPlace
# from .tensor.tensor import Tensor #DEFINE_ALIAS # from .tensor.tensor import Tensor #DEFINE_ALIAS
# from .tensor.tensor import LoDTensor #DEFINE_ALIAS # from .tensor.tensor import LoDTensor #DEFINE_ALIAS
# from .tensor.tensor import LoDTensorArray #DEFINE_ALIAS # from .tensor.tensor import LoDTensorArray #DEFINE_ALIAS
......
...@@ -22,7 +22,9 @@ from paddle.fluid.dygraph.parallel import ParallelEnv ...@@ -22,7 +22,9 @@ from paddle.fluid.dygraph.parallel import ParallelEnv
__all__ = [ __all__ = [
'get_cudnn_version', 'get_cudnn_version',
'set_device', 'set_device',
'get_device' 'get_device',
'XPUPlace',
'is_compiled_with_xpu'
# 'cpu_places', # 'cpu_places',
# 'CPUPlace', # 'CPUPlace',
# 'cuda_pinned_places', # 'cuda_pinned_places',
...@@ -35,6 +37,37 @@ __all__ = [ ...@@ -35,6 +37,37 @@ __all__ = [
_cudnn_version = None _cudnn_version = None
def is_compiled_with_xpu():
"""
Whether paddle was built with WITH_XPU=ON to support Baidu Kunlun
Returns (bool): whether paddle was built with WITH_XPU=ON
Examples:
.. code-block:: python
import paddle
support_xpu = paddle.device.is_compiled_with_xpu()
"""
return core.is_compiled_with_xpu()
def XPUPlace(dev_id):
"""
Return a Baidu Kunlun Place
Parameters:
dev_id(int): Baidu Kunlun device id
Examples:
.. code-block:: python
import paddle
place = paddle.device.XPUPlace(0)
"""
return core.XPUPlace(dev_id)
def get_cudnn_version(): def get_cudnn_version():
""" """
This funciton return the version of cudnn. the retuen value is int which represents the This funciton return the version of cudnn. the retuen value is int which represents the
......
...@@ -217,6 +217,9 @@ class OpTest(unittest.TestCase): ...@@ -217,6 +217,9 @@ class OpTest(unittest.TestCase):
return False return False
return True return True
def is_xpu_op_test():
return hasattr(cls, "use_xpu") and cls.use_xpu == True
def is_mkldnn_op_test(): def is_mkldnn_op_test():
return hasattr(cls, "use_mkldnn") and cls.use_mkldnn == True return hasattr(cls, "use_mkldnn") and cls.use_mkldnn == True
...@@ -239,6 +242,7 @@ class OpTest(unittest.TestCase): ...@@ -239,6 +242,7 @@ class OpTest(unittest.TestCase):
if cls.dtype in [np.float32, np.float64] \ if cls.dtype in [np.float32, np.float64] \
and cls.op_type not in op_accuracy_white_list.NO_FP64_CHECK_GRAD_OP_LIST \ and cls.op_type not in op_accuracy_white_list.NO_FP64_CHECK_GRAD_OP_LIST \
and not hasattr(cls, 'exist_fp64_check_grad') \ and not hasattr(cls, 'exist_fp64_check_grad') \
and not is_xpu_op_test() \
and not is_mkldnn_op_test(): and not is_mkldnn_op_test():
raise AssertionError( raise AssertionError(
"This test of %s op needs check_grad with fp64 precision." % "This test of %s op needs check_grad with fp64 precision." %
...@@ -336,6 +340,11 @@ class OpTest(unittest.TestCase): ...@@ -336,6 +340,11 @@ class OpTest(unittest.TestCase):
self.attrs["use_mkldnn"] == True): self.attrs["use_mkldnn"] == True):
self.__class__.use_mkldnn = True self.__class__.use_mkldnn = True
if (hasattr(self, "use_xpu") and self.use_xpu == True) or \
(hasattr(self, "attrs") and "use_xpu" in self.attrs and \
self.attrs["use_xpu"] == True):
self.__class__.use_xpu = True
op_proto = OpProtoHolder.instance().get_op_proto(self.op_type) op_proto = OpProtoHolder.instance().get_op_proto(self.op_type)
"infer datatype from inputs and outputs for this test case" "infer datatype from inputs and outputs for this test case"
self.infer_dtype_from_inputs_outputs(self.inputs, self.outputs) self.infer_dtype_from_inputs_outputs(self.inputs, self.outputs)
...@@ -932,6 +941,8 @@ class OpTest(unittest.TestCase): ...@@ -932,6 +941,8 @@ class OpTest(unittest.TestCase):
need_run_ops = self._get_need_run_ops(op_desc) need_run_ops = self._get_need_run_ops(op_desc)
res = {} res = {}
if hasattr(self, 'attrs') and bool(self.attrs.get('use_xpu', False)):
return
for op_desc, father_op_desc in reversed(need_run_ops): for op_desc, father_op_desc in reversed(need_run_ops):
# The first one is the forward op # The first one is the forward op
has_infer_inplace = fluid.core.has_infer_inplace(op_desc.type()) has_infer_inplace = fluid.core.has_infer_inplace(op_desc.type())
...@@ -1203,6 +1214,11 @@ class OpTest(unittest.TestCase): ...@@ -1203,6 +1214,11 @@ class OpTest(unittest.TestCase):
self.attrs["use_mkldnn"] == True): self.attrs["use_mkldnn"] == True):
self.__class__.use_mkldnn = True self.__class__.use_mkldnn = True
if (hasattr(self, "use_xpu") and self.use_xpu == True) or \
(hasattr(self, "attrs") and "use_xpu" in self.attrs and \
self.attrs["use_xpu"] == True):
self.__class__.use_xpu = True
places = self._get_places() places = self._get_places()
for place in places: for place in places:
res = self.check_output_with_place(place, atol, no_check_set, res = self.check_output_with_place(place, atol, no_check_set,
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
from __future__ import print_function from __future__ import print_function
import paddle.fluid.core as core
import unittest import unittest
import numpy as np import numpy as np
from op_test import OpTest from op_test import OpTest
......
...@@ -18,6 +18,8 @@ import unittest ...@@ -18,6 +18,8 @@ import unittest
import numpy as np import numpy as np
import paddle import paddle
import paddle.fluid.core as core import paddle.fluid.core as core
import sys
sys.path.append("..")
from op_test import OpTest from op_test import OpTest
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import Program, program_guard from paddle.fluid import Program, program_guard
...@@ -175,57 +177,5 @@ class TestFP16MulOp2(TestMulOp2): ...@@ -175,57 +177,5 @@ class TestFP16MulOp2(TestMulOp2):
no_grad_set=set('Y')) no_grad_set=set('Y'))
@unittest.skipIf(not core.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUMulOp1(TestMulOp):
def init_dtype_type(self):
self.dtype = np.float32
def test_check_output(self):
place = core.XPUPlace(0)
self.check_output_with_place(place, atol=1e-1)
def test_check_grad_normal(self):
place = core.XPUPlace(0)
self.check_grad_with_place(
place, ['X', 'Y'], 'Out', max_relative_error=0.5)
def test_check_grad_ingore_x(self):
place = core.XPUPlace(0)
self.check_grad_with_place(
place, ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
place = core.XPUPlace(0)
self.check_grad_with_place(
place, ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y'))
@unittest.skipIf(not core.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUMulOp2(TestMulOp2):
def init_dtype_type(self):
self.dtype = np.float32
def test_check_output(self):
place = core.XPUPlace(0)
self.check_output_with_place(place, atol=2e-1)
def test_check_grad_normal(self):
place = core.XPUPlace(0)
self.check_grad_with_place(
place, ['X', 'Y'], 'Out', max_relative_error=0.9)
def test_check_grad_ingore_x(self):
place = core.XPUPlace(0)
self.check_grad_with_place(
place, ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
place = core.XPUPlace(0)
self.check_grad_with_place(
place, ['X'], 'Out', max_relative_error=0.9, no_grad_set=set('Y'))
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import sys
sys.path.append("..")
import unittest
import numpy as np
import paddle.fluid.core as core
from op_test import OpTest
from scipy.special import expit, erf
import paddle
import paddle.fluid as fluid
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.fluid import compiler, Program, program_guard
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUActivation(OpTest):
def setUp(self):
self.op_type = "exp"
self.init_dtype()
self.init_kernel_type()
x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
out = np.exp(x)
self.attrs = {'use_xpu': True}
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}
def init_dtype(self):
self.dtype = np.float32
def test_check_output(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place, atol=1e-3)
def init_kernel_type(self):
pass
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUSigmoid(TestXPUActivation):
def setUp(self):
self.op_type = "sigmoid"
self.init_dtype()
x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
out = 1 / (1 + np.exp(-x))
self.attrs = {'use_xpu': True}
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}
def test_check_grad(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X'], 'Out', max_relative_error=0.01)
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUTanh(TestXPUActivation):
def setUp(self):
self.op_type = "tanh"
self.init_dtype()
x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
out = np.tanh(x)
self.attrs = {'use_xpu': True}
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUSqrt(TestXPUActivation):
def setUp(self):
self.op_type = "sqrt"
self.init_dtype()
x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
out = np.sqrt(x)
self.attrs = {'use_xpu': True}
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUAbs(TestXPUActivation):
def setUp(self):
self.op_type = "abs"
self.init_dtype()
x = np.random.uniform(-1, 1, [4, 25]).astype(self.dtype)
# Because we set delta = 0.005 in calculating numeric gradient,
# if x is too small, such as 0.002, x_neg will be -0.003
# x_pos will be 0.007, so the numeric gradient is inaccurate.
# we should avoid this
x[np.abs(x) < 0.005] = 0.02
out = np.abs(x)
self.attrs = {'use_xpu': True}
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPURelu(TestXPUActivation):
def setUp(self):
self.op_type = "relu"
self.init_dtype()
x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
# The same reason with TestAbs
x[np.abs(x) < 0.005] = 0.02
out = np.maximum(x, 0)
self.attrs = {'use_xpu': True}
self.inputs = {'X': x}
self.outputs = {'Out': out}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUGelu(TestXPUActivation):
def setUp(self):
self.op_type = "gelu"
self.init_dtype()
approximate = False
x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
out = gelu(x, approximate)
self.inputs = {'X': x}
self.outputs = {'Out': out}
self.attrs = {"approximate": approximate, 'use_xpu': True}
def gelu(x, approximate):
if approximate:
y_ref = 0.5 * x * (1.0 + np.tanh(
np.sqrt(2 / np.pi) * (x + 0.044715 * np.power(x, 3))))
else:
y_ref = 0.5 * x * (1 + erf(x / np.sqrt(2)))
return y_ref.astype(x.dtype)
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPULog(TestXPUActivation):
def setUp(self):
self.op_type = "log"
self.init_dtype()
x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
out = np.log(x)
self.attrs = {'use_xpu': True}
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUSquare(TestXPUActivation):
def setUp(self):
self.op_type = "square"
self.init_dtype()
x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
out = np.square(x)
self.attrs = {'use_xpu': True}
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUPow(TestXPUActivation):
def setUp(self):
self.op_type = "pow"
self.init_dtype()
x = np.random.uniform(1, 2, [11, 17]).astype(self.dtype)
out = np.power(x, 3)
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.attrs = {'factor': 3.0, 'use_xpu': True}
self.outputs = {'Out': out}
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import sys
sys.path.append("..")
import unittest
import numpy as np
import paddle
import paddle.fluid.core as core
from op_test import OpTest, skip_check_grad_ci
import paddle.fluid as fluid
from paddle.fluid import compiler, Program, program_guard
class TestElementwiseAddOp(OpTest):
def init_kernel_type(self):
self.use_mkldnn = False
def setUp(self):
self.op_type = "elementwise_add"
self.init_dtype()
self.init_input_output()
self.init_kernel_type()
self.init_axis()
self.inputs = {
'X': OpTest.np_dtype_to_fluid_dtype(self.x),
'Y': OpTest.np_dtype_to_fluid_dtype(self.y)
}
self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn}
self.outputs = {'Out': self.out}
def test_check_output(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_output(check_dygraph=(self.use_mkldnn == False))
def test_check_grad_normal(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode
if self.dtype == np.float16:
return
self.check_grad(
['X', 'Y'], 'Out', check_dygraph=(self.use_mkldnn == False))
def test_check_grad_ingore_x(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode
if self.dtype == np.float16:
return
self.check_grad(
['Y'],
'Out',
no_grad_set=set("X"),
check_dygraph=(self.use_mkldnn == False))
def test_check_grad_ingore_y(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode
if self.dtype == np.float16:
return
self.check_grad(
['X'],
'Out',
no_grad_set=set('Y'),
check_dygraph=(self.use_mkldnn == False))
def init_input_output(self):
self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
self.y = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
self.out = np.add(self.x, self.y)
def init_dtype(self):
self.dtype = np.float64
def init_axis(self):
self.axis = -1
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUElementwiseAddOp(OpTest):
def setUp(self):
self.op_type = "elementwise_add"
self.init_dtype()
self.init_input_output()
self.init_axis()
self.inputs = {'X': self.x, 'Y': self.y}
self.attrs = {'axis': self.axis, 'use_mkldnn': False, 'use_xpu': True}
self.outputs = {'Out': self.out}
def test_check_output(self):
if self.dtype == np.float32 and paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
def test_check_grad_normal(self):
if self.dtype == np.float32 and paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X', 'Y'], 'Out')
def test_check_grad_ingore_x(self):
if self.dtype == np.float32 and paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['Y'], 'Out')
def test_check_grad_ingore_y(self):
if self.dtype == np.float32 and paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X'], 'Out')
def init_input_output(self):
self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
self.y = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
self.out = np.add(self.x, self.y)
def init_dtype(self):
self.dtype = np.float32
def init_axis(self):
self.axis = -1
@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1) to test broadcast.")
class TestElementwiseAddOp_scalar(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4).astype(self.dtype)
self.y = np.random.rand(1).astype(self.dtype)
self.out = self.x + self.y
@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1,1) to test broadcast.")
class TestElementwiseAddOp_scalar2(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4).astype(self.dtype)
self.y = np.random.rand(1, 1).astype(self.dtype)
self.out = self.x + self.y
class TestElementwiseAddOp_Vector(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.random((100, )).astype(self.dtype)
self.y = np.random.random((100, )).astype(self.dtype)
self.out = np.add(self.x, self.y)
class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(100, 2, 3).astype(self.dtype)
self.y = np.random.rand(100).astype(self.dtype)
self.out = self.x + self.y.reshape(100, 1, 1)
def init_axis(self):
self.axis = 0
class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 100, 3).astype(self.dtype)
self.y = np.random.rand(100).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 100, 1)
def init_axis(self):
self.axis = 1
class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 3, 100).astype(self.dtype)
self.y = np.random.rand(100).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 1, 100)
class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype)
self.y = np.random.rand(10, 12).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 10, 12, 1)
def init_axis(self):
self.axis = 1
class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(100, 2, 3, 4).astype(self.dtype)
self.y = np.random.rand(100, 1).astype(self.dtype)
self.out = self.x + self.y.reshape(100, 1, 1, 1)
def init_axis(self):
self.axis = 0
class TestElementwiseAddOp_broadcast_5(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(10, 3, 12).astype(self.dtype)
self.y = np.random.rand(10, 1, 12).astype(self.dtype)
self.out = self.x + self.y
class TestElementwiseAddOp_broadcast_6(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 12, 3, 5).astype(self.dtype)
self.y = np.random.rand(2, 12, 1, 5).astype(self.dtype)
self.out = self.x + self.y
class TestElementwiseAddOp_broadcast_7(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(1, 1, 20, 5).astype(self.dtype)
self.y = np.random.rand(20, 5, 1, 1).astype(self.dtype)
self.out = self.x + self.y
class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 10, 12).astype(self.dtype)
self.y = np.random.rand(10, 12).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 10, 12)
def init_axis(self):
self.axis = 1
@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1) to test broadcast.")
class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(100, 1).astype(self.dtype)
self.y = np.random.rand(1).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 1)
def init_axis(self):
self.axis = 1
class TestElementwiseAddOp_channelwise_add(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(100, 2, 3).astype(self.dtype)
self.y = np.random.rand(100, 1, 1).astype(self.dtype)
self.out = self.x + self.y
def init_axis(self):
self.axis = -1
class TestElementwiseAddOp_commonuse_add1(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 3, 100).astype(self.dtype)
self.y = np.random.rand(1, 1, 100).astype(self.dtype)
self.out = self.x + self.y
def init_axis(self):
self.axis = -1
class TestElementwiseAddOp_commonuse_add2(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(10, 3, 1, 4).astype(self.dtype)
self.y = np.random.rand(10, 1, 12, 1).astype(self.dtype)
self.out = self.x + self.y
def init_axis(self):
self.axis = -1
class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(10, 12).astype(self.dtype)
self.y = np.random.rand(2, 3, 10, 12).astype(self.dtype)
self.out = self.x + self.y
def init_axis(self):
self.axis = 2
class TestElementwiseAddOpError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
# the input of elementwise_add must be Variable.
x1 = fluid.create_lod_tensor(
np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace())
y1 = fluid.create_lod_tensor(
np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace())
self.assertRaises(TypeError, fluid.layers.elementwise_add, x1, y1)
# the input dtype of elementwise_add must be float16 or float32 or float64 or int32 or int64
# float16 only can be set on GPU place
x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="uint8")
y2 = fluid.layers.data(name='y2', shape=[3, 4, 5, 6], dtype="uint8")
self.assertRaises(TypeError, fluid.layers.elementwise_add, x2, y2)
class TestAddOp(unittest.TestCase):
def test_name(self):
with fluid.program_guard(fluid.Program()):
x = fluid.data(name="x", shape=[2, 3], dtype="float32")
y = fluid.data(name='y', shape=[2, 3], dtype='float32')
y_1 = paddle.add(x, y, name='add_res')
self.assertEqual(('add_res' in y_1.name), True)
def test_declarative(self):
with fluid.program_guard(fluid.Program()):
def gen_data():
return {
"x": np.array([2, 3, 4]).astype('float32'),
"y": np.array([1, 5, 2]).astype('float32')
}
x = fluid.data(name="x", shape=[3], dtype='float32')
y = fluid.data(name="y", shape=[3], dtype='float32')
z = paddle.add(x, y)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
z_value = exe.run(feed=gen_data(), fetch_list=[z.name])
z_expected = np.array([3., 8., 6.])
self.assertEqual((z_value == z_expected).all(), True)
def test_dygraph(self):
with fluid.dygraph.guard():
np_x = np.array([2, 3, 4]).astype('float64')
np_y = np.array([1, 5, 2]).astype('float64')
x = fluid.dygraph.to_variable(np_x)
y = fluid.dygraph.to_variable(np_y)
z = paddle.add(x, y)
np_z = z.numpy()
z_expected = np.array([3., 8., 6.])
self.assertEqual((np_z == z_expected).all(), True)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import sys
sys.path.append("..")
import paddle.fluid.core as core
import unittest
import numpy as np
from op_test import OpTest
import paddle
import paddle.fluid as fluid
from paddle.fluid import Program, program_guard
def generate_compatible_shapes(dim_X, dim_Y, transpose_X, transpose_Y):
BATCH_SIZE = 2
M = 3
N = 4
K = 5
if (dim_X == 1 and transpose_X) or (dim_Y == 1 and transpose_Y):
K = 1
if dim_X == 1:
if transpose_X:
shape_X = [M]
else:
shape_X = [K]
if dim_Y == 1:
if transpose_Y:
shape_Y = [N]
else:
shape_Y = [K]
if dim_X >= 2:
if transpose_X:
shape_X = [K, M]
else:
shape_X = [M, K]
if dim_X == 3:
shape_X = [BATCH_SIZE] + shape_X
if dim_Y >= 2:
if transpose_Y:
shape_Y = [N, K]
else:
shape_Y = [K, N]
if dim_Y == 3:
shape_Y = [BATCH_SIZE] + shape_Y
return shape_X, shape_Y
def reference_matmul(X, Y, transpose_X=False, transpose_Y=False):
"""Reference forward implementation using np.matmul."""
# np.matmul does not support the transpose flags, so we manually
# transpose X and Y appropriately.
if transpose_X:
if X.ndim == 1:
X = X.reshape((X.size, 1))
elif X.ndim == 2:
X = X.T
else:
dim = [i for i in range(len(X.shape))]
dim[-1], dim[len(X.shape) - 2] = dim[len(X.shape) - 2], dim[-1]
X = np.transpose(X, tuple(dim))
if transpose_Y:
if Y.ndim == 1:
Y = Y.reshape((1, Y.size))
else:
dim = [i for i in range(len(Y.shape))]
dim[-1], dim[len(Y.shape) - 2] = dim[len(Y.shape) - 2], dim[-1]
Y = np.transpose(Y, tuple(dim))
Out = np.matmul(X, Y)
if not Out.shape:
# We do not support 0-dimensional Tensors (scalars). So where
# np.matmul outputs a scalar, we must convert to a Tensor of
# shape (1, ) instead.
# Everywhere else, we are compatible with np.matmul.
Out = np.array([Out], dtype="float32")
return Out
class Generator(object):
def setUp(self):
self.op_type = "matmul"
X = np.random.random(self.shape_X).astype("float32")
Y = np.random.random(self.shape_Y).astype("float32")
Out = reference_matmul(X, Y, self.transpose_X, self.transpose_Y)
self.inputs = {'X': X, 'Y': Y}
self.attrs = {
'transpose_X': self.transpose_X,
'transpose_Y': self.transpose_Y
}
self.outputs = {'Out': Out}
def test_check_output(self):
self.check_output()
if paddle.is_compiled_with_xpu() and len(self.inputs['X'].shape) == len(
self.inputs['Y'].shape) and self.inputs['X'].shape[
0] == self.inputs['Y'].shape[0]:
place = paddle.XPUPlace(0)
self.check_output_with_place(place, atol=1e-3)
def test_check_grad_normal(self):
self.check_grad(['X', 'Y'], 'Out', max_relative_error=1e-3)
if paddle.is_compiled_with_xpu() and len(self.inputs['X'].shape) == len(
self.inputs['Y'].shape) and self.inputs['X'].shape[
0] == self.inputs['Y'].shape[0]:
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X', 'Y'], 'Out', max_relative_error=5e-2)
def test_check_grad_ignore_x(self):
self.check_grad(
['Y'], 'Out', max_relative_error=1e-3, no_grad_set=set("X"))
if paddle.is_compiled_with_xpu() and len(self.inputs['X'].shape) == len(
self.inputs['Y'].shape) and self.inputs['X'].shape[
0] == self.inputs['Y'].shape[0]:
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['Y'],
'Out',
max_relative_error=5e-2,
no_grad_set=set("X"))
def test_check_grad_ignore_y(self):
self.check_grad(
['X'], 'Out', max_relative_error=1e-3, no_grad_set=set('Y'))
if paddle.is_compiled_with_xpu() and len(self.inputs['X'].shape) == len(
self.inputs['Y'].shape) and self.inputs['X'].shape[
0] == self.inputs['Y'].shape[0]:
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X'],
'Out',
max_relative_error=5e-2,
no_grad_set=set('Y'))
class TestMatmulOpError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
# The inputs type of matmul_op must be Variable.
input1 = 12
self.assertRaises(TypeError, fluid.layers.matmul, input1, input1)
# The inputs dtype of matmul_op must be float32, float64.
input2 = fluid.layers.data(
name='input2', shape=[10, 10], dtype="int32")
self.assertRaises(TypeError, fluid.layers.matmul, input2, input2)
input3 = fluid.layers.data(
name='input3', shape=[2, 2], dtype="float16")
fluid.layers.matmul(input3, input3)
# Negative dimension generation
def generate_negative_dims(in_shape):
from itertools import combinations
size = len(in_shape)
indexs = list()
shapes = list()
for i in range(size):
indexs.extend(list(combinations([j for j in range(size)], i + 1)))
for idx in indexs:
shapes.append(
[in_shape[i] if i not in idx else -1 for i in range(size)])
return shapes
# Build program with inputs sizes that contain negative numbers
def test_negative_dims_program(obj):
for shape_x in generate_negative_dims(obj.shape_X):
for shape_y in generate_negative_dims(obj.shape_Y):
X = np.random.random(obj.shape_X).astype("float32")
Y = np.random.random(obj.shape_Y).astype("float32")
Ref = reference_matmul(X, Y, obj.transpose_X, obj.transpose_Y)
with program_guard(Program(), Program()):
x = fluid.data(name='x', shape=shape_x, dtype='float32')
y = fluid.data(name='y', shape=shape_y, dtype='float32')
output = fluid.layers.matmul(x, y, obj.transpose_X,
obj.transpose_Y)
obj.assertEqual(len(Ref.shape), len(output.shape))
for idx in range(len(Ref.shape)):
if output.shape[idx] != -1:
obj.assertEqual(Ref.shape[idx], output.shape[idx])
exe = fluid.Executor(fluid.CPUPlace())
res, = exe.run(fluid.default_main_program(),
feed={'x': X,
'y': Y},
fetch_list=[output])
np.allclose(res, Ref, atol=1e-5)
# Generate program api cases for all negative possibilities
def api_test(dim_x, dim_y, trans_x, trans_y):
test_name = ('TestMatMulAPI_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format(
dim_x, dim_y, trans_x, trans_y))
shape_x, shape_y = generate_compatible_shapes(dim_x, dim_y, trans_x,
trans_y)
globals()[test_name] = type(test_name, (unittest.TestCase, ), {
'shape_X': shape_x,
'shape_Y': shape_y,
'transpose_X': trans_x,
'transpose_Y': trans_y,
'test_propram': test_negative_dims_program,
})
# Generate operators cases for all possibilities
def inject_test(dim_x, dim_y, trans_x, trans_y):
test_name = ('TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format(
dim_x, dim_y, trans_x, trans_y))
shape_x, shape_y = generate_compatible_shapes(dim_x, dim_y, trans_x,
trans_y)
globals()[test_name] = type(test_name, (Generator, OpTest), {
'shape_X': shape_x,
'shape_Y': shape_y,
'transpose_X': trans_x,
'transpose_Y': trans_y,
})
for dim_X in (1, 2, 3):
for dim_Y in (1, 2, 3):
for transose_x in (False, True):
for transose_y in (False, True):
inject_test(dim_X, dim_Y, transose_x, transose_y)
api_test(dim_X, dim_Y, transose_x, transose_y)
# Test case n-dim
def generate_compatible_shapes(dim, transpose_X, transpose_Y):
M = 2
N = 4
K = 3
shape_X = [2 for _ in range(dim - 2)]
shape_Y = [2 for _ in range(dim - 2)]
if transpose_X:
shape_X += [K, M]
else:
shape_X += [M, K]
if transpose_Y:
shape_Y += [N, K]
else:
shape_Y += [K, N]
return shape_X, shape_Y
# # Test case n-dim
for dim in [4]:
for transpose_X in [False, True]:
for transpose_Y in [False, True]:
test_name = (
'TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format(
dim, dim, transpose_X, transpose_Y))
shape_X, shape_Y = generate_compatible_shapes(dim, transpose_X,
transpose_Y)
globals()[test_name] = type(test_name, (Generator, OpTest), {
'shape_X': shape_X,
'shape_Y': shape_Y,
'transpose_X': transpose_X,
'transpose_Y': transpose_Y,
})
class API_TestMm(unittest.TestCase):
def test_out(self):
with fluid.program_guard(fluid.Program()):
x = fluid.data(name="x", shape=[2], dtype="float64")
y = fluid.data(name='y', shape=[2], dtype='float64')
res = fluid.data(name="output", shape=[1], dtype="float64")
result = paddle.mm(x, y)
exe = fluid.Executor(fluid.CPUPlace())
data1 = np.random.rand(2)
data2 = np.random.rand(2)
np_res = exe.run(feed={'x': data1, 'y': data2}, fetch_list=[result])
expected_result = np.matmul(
data1.reshape(1, 2), data2.reshape(2, 1))
self.assertTrue(
np.allclose(
np_res, expected_result, atol=1e-5),
"two value is\
{}\n{}, check diff!".format(np_res, expected_result))
def test_dygraph_without_out(self):
device = fluid.CPUPlace()
with fluid.dygraph.guard(device):
input_array1 = np.random.rand(3, 4).astype("float64")
input_array2 = np.random.rand(4, 3).astype("float64")
data1 = fluid.dygraph.to_variable(input_array1)
data2 = fluid.dygraph.to_variable(input_array2)
out = paddle.mm(data1, data2)
expected_result = np.matmul(input_array1, input_array2)
self.assertTrue(np.allclose(expected_result, out.numpy()))
class Test_API_Matmul(unittest.TestCase):
def test_dygraph_without_out(self):
device = fluid.CPUPlace()
with fluid.dygraph.guard(device):
input_array1 = np.random.rand(3, 4).astype("float64")
input_array2 = np.random.rand(4, 3).astype("float64")
data1 = fluid.dygraph.to_variable(input_array1)
data2 = fluid.dygraph.to_variable(input_array2)
out = paddle.matmul(data1, data2)
expected_result = np.matmul(input_array1, input_array2)
self.assertTrue(np.allclose(expected_result, out.numpy()))
class API_TestMmError(unittest.TestCase):
def test_errors(self):
def test_error1():
with fluid.program_guard(fluid.Program(), fluid.Program()):
data1 = fluid.data(name="data1", shape=[10, 2], dtype="float32")
data2 = fluid.data(name="data2", shape=[3, 10], dtype="float32")
paddle.mm(data1, data2)
self.assertRaises(ValueError, test_error1)
def test_error2():
with fluid.program_guard(fluid.Program(), fluid.Program()):
data1 = fluid.data(
name="data1", shape=[-1, 10, 2], dtype="float32")
data2 = fluid.data(
name="data2", shape=[-1, 2, 10], dtype="float32")
paddle.mm(data1, data2)
test_error2()
def test_error3():
with fluid.program_guard(fluid.Program(), fluid.Program()):
data1 = fluid.data(
name="data1", shape=[10, 10, 2], dtype="float32")
data2 = fluid.data(
name="data2", shape=[3, 2, 10], dtype="float32")
paddle.mm(data1, data2)
self.assertRaises(ValueError, test_error3)
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import paddle
import paddle.fluid.core as core
import sys
sys.path.append("..")
from op_test import OpTest
import paddle.fluid as fluid
from paddle.fluid import Program, program_guard
class TestMulOp(OpTest):
def setUp(self):
self.op_type = "mul"
self.dtype = np.float64
self.init_dtype_type()
self.inputs = {
'X': np.random.random((20, 5)).astype(self.dtype),
'Y': np.random.random((5, 21)).astype(self.dtype)
}
self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])}
def init_dtype_type(self):
pass
def test_check_output(self):
self.check_output()
def test_check_grad_normal(self):
self.check_grad(['X', 'Y'], 'Out')
def test_check_grad_ingore_x(self):
self.check_grad(
['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
self.check_grad(
['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y'))
class TestMulOpError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
# The input type of mul_op must be Variable.
x1 = fluid.create_lod_tensor(
np.array([[-1]]), [[1]], fluid.CPUPlace())
x2 = fluid.create_lod_tensor(
np.array([[-1]]), [[1]], fluid.CPUPlace())
self.assertRaises(TypeError, fluid.layers.mul, x1, x2)
# The input dtype of mul_op must be float32 or float64.
x3 = fluid.layers.data(name='x3', shape=[4], dtype="int32")
x4 = fluid.layers.data(name='x4', shape=[4], dtype="int32")
self.assertRaises(TypeError, fluid.layers.mul, x3, x4)
class TestMulOp2(OpTest):
def setUp(self):
self.op_type = "mul"
self.dtype = np.float64
self.init_dtype_type()
self.inputs = {
'X': np.random.random((3, 4, 2, 9)).astype(self.dtype),
'Y': np.random.random((3, 6, 1, 2, 3)).astype(self.dtype)
}
self.attrs = {
'x_num_col_dims': 2,
'y_num_col_dims': 2,
}
result = np.dot(self.inputs['X'].reshape(3 * 4, 2 * 9),
self.inputs['Y'].reshape(3 * 6, 1 * 2 * 3))
result = result.reshape(3, 4, 1, 2, 3)
self.outputs = {'Out': result}
def init_dtype_type(self):
pass
def test_check_output(self):
self.check_output()
def test_check_grad_normal(self):
self.check_grad(['X', 'Y'], 'Out')
def test_check_grad_ingore_x(self):
self.check_grad(
['Y'], 'Out', max_relative_error=0.5, no_grad_set=set('X'))
def test_check_grad_ignore_y(self):
self.check_grad(
['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y'))
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUMulOp1(TestMulOp):
def init_dtype_type(self):
self.dtype = np.float32
def test_check_output(self):
place = paddle.XPUPlace(0)
self.check_output_with_place(place, atol=1e-1)
def test_check_grad_normal(self):
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X', 'Y'], 'Out', max_relative_error=0.5)
def test_check_grad_ingore_x(self):
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y'))
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUMulOp2(TestMulOp2):
def init_dtype_type(self):
self.dtype = np.float32
def test_check_output(self):
place = paddle.XPUPlace(0)
self.check_output_with_place(place, atol=2e-1)
def test_check_grad_normal(self):
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X', 'Y'], 'Out', max_relative_error=0.9)
def test_check_grad_ingore_x(self):
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X'], 'Out', max_relative_error=0.9, no_grad_set=set('Y'))
if __name__ == "__main__":
unittest.main()
...@@ -407,7 +407,9 @@ ...@@ -407,7 +407,9 @@
"TransformerDecoder.prepare_incremental_cache", "TransformerDecoder.prepare_incremental_cache",
"LinearChainCRF.forward", "LinearChainCRF.forward",
"CRFDecoding.forward", "CRFDecoding.forward",
"SequenceTagging.forward" "SequenceTagging.forward",
"XPUPlace",
"is_compiled_with_xpu"
], ],
"gpu_not_white":[ "gpu_not_white":[
"deformable_conv", "deformable_conv",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册