提交 92462e94 编写于 作者: W Wojciech Uss 提交者: Tao Luo

improve elementwise_add_mkldnn_op test code coverage (#22359)

上级 e6ca512a
......@@ -95,7 +95,13 @@ class ElementwiseOp : public framework::OperatorWithKernel {
auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
#ifdef PADDLE_WITH_MKLDNN
if (platform::CanMKLDNNBeUsed(ctx)) {
// If broadcasting is needed, use native implementation
auto CanMKLDNNElementwiseAddBeUsed = [&]() {
return ctx.Input<Tensor>("X")->dims() == ctx.Input<Tensor>("Y")->dims();
};
if (platform::CanMKLDNNBeUsed(ctx) &&
(ctx.Type() != "elementwise_add" || CanMKLDNNElementwiseAddBeUsed())) {
return framework::OpKernelType(input_data_type, ctx.GetPlace(),
framework::DataLayout::kMKLDNN,
framework::LibraryType::kMKLDNN);
......@@ -227,7 +233,16 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel {
ctx, framework::GradVarName("Out"));
#ifdef PADDLE_WITH_MKLDNN
if (platform::CanMKLDNNBeUsed(ctx)) {
// If broadcasting is needed, use native implementation
auto CanMKLDNNElementwiseAddGradBeUsed = [&]() {
auto dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
return (dx != nullptr && dy != nullptr && dx->dims() == dy->dims());
};
if (platform::CanMKLDNNBeUsed(ctx) &&
(ctx.Type() != "elementwise_add_grad" ||
CanMKLDNNElementwiseAddGradBeUsed())) {
return framework::OpKernelType(input_data_type, ctx.GetPlace(),
framework::DataLayout::kMKLDNN,
framework::LibraryType::kMKLDNN);
......
......@@ -41,136 +41,58 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* z = ctx.Output<Tensor>("Out");
const T* x_data = x->data<T>();
const T* y_data = y->data<T>();
int axis = ctx.Attr<int>("axis");
auto x_dims = x->dims();
auto y_dims_untrimed = y->dims();
auto z_dims = z->dims();
mkldnn::stream astream(mkldnn_engine);
// Execute default elementwise_add operator when
// broadcast operations need to performed.
if (x_dims != y_dims_untrimed) {
Tensor _x;
MKLDNNMemoryFormat format;
auto src_x_tz = framework::vectorize<int64_t>(x_dims);
if ((src_x_tz.size() == 3 &&
x->format() != (format = MKLDNNMemoryFormat::ncw)) ||
(src_x_tz.size() == 4 &&
x->format() != (format = MKLDNNMemoryFormat::nchw)) ||
(src_x_tz.size() == 5 &&
x->format() != (format = MKLDNNMemoryFormat::ncdhw))) {
_x.Resize(x_dims);
mkldnn::memory::data_type in_type = platform::MKLDNNGetDataType<T>();
auto out_format = platform::MKLDNNFormatForSize(
x_dims.size(), MKLDNNMemoryFormat::nchw);
const std::string key =
platform::CreateKey(src_x_tz, x->format(), out_format, in_type);
platform::ReorderMKLDNNHandler handler(src_x_tz, x->type(), in_type,
dev_ctx, mkldnn_engine, key);
auto user_x_memory_p = handler.AcquireSrcMemory(
x->format(), paddle::platform::to_void_cast(x_data));
auto x_memory_p =
handler.AcquireDstMemory(&_x, out_format, ctx.GetPlace());
auto x_reorder = handler.AcquireReorder(x_memory_p, user_x_memory_p);
x_reorder->execute(astream, *user_x_memory_p, *x_memory_p);
astream.wait();
} else {
format = x->format();
_x.ShareDataWith(*x);
}
z->mutable_data<T>(ctx.GetPlace());
auto sum_func = [](T a, T b) -> T { return a + b; };
TransformFunctor<decltype(sum_func), T,
paddle::platform::CPUDeviceContext, T>
functor(
&_x, y, z,
ctx.template device_context<paddle::platform::CPUDeviceContext>(),
sum_func);
PADDLE_ENFORCE_EQ(
x->layout(), DataLayout::kMKLDNN,
platform::errors::InvalidArgument("Wrong layout set for X tensor"));
PADDLE_ENFORCE_NE(
x->format(), MKLDNNMemoryFormat::undef,
platform::errors::InvalidArgument("Wrong format set for X tensor"));
axis = (axis == -1 ? x_dims.size() - y_dims_untrimed.size() : axis);
PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
"Axis should be in range [0, x_dims)");
PADDLE_ENFORCE_EQ(
y->layout(), DataLayout::kMKLDNN,
platform::errors::InvalidArgument("Wrong layout set for Y tensor"));
PADDLE_ENFORCE_NE(
y->format(), MKLDNNMemoryFormat::undef,
platform::errors::InvalidArgument("Wrong format set for Y tensor"));
auto y_dims = trim_trailing_singular_dims(y_dims_untrimed);
axis = (y_dims.size() == 0) ? x_dims.size() : axis;
int pre, n, post, is_run_common_broadcast;
get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post,
&is_run_common_broadcast);
if (post == 1) {
functor.RunRowWise(n, pre);
} else {
functor.RunMidWise(n, pre, post);
}
z->set_layout(DataLayout::kMKLDNN);
z->set_format(format);
} else {
PADDLE_ENFORCE_EQ(x->layout(), DataLayout::kMKLDNN,
"Wrong layout set for X tensor");
PADDLE_ENFORCE_NE(x->format(), MKLDNNMemoryFormat::undef,
"Wrong format set for X tensor");
PADDLE_ENFORCE_EQ(y->layout(), DataLayout::kMKLDNN,
"Wrong layout set for Y tensor");
PADDLE_ENFORCE_NE(y->format(), MKLDNNMemoryFormat::undef,
"Wrong format set for Y tensor");
auto src_x_tz = framework::vectorize<int64_t>(x_dims);
auto src_y_tz = framework::vectorize<int64_t>(y_dims_untrimed);
auto dst_tz = framework::vectorize<int64_t>(z_dims);
std::vector<float> scales = {1.0f, 1.0f};
const std::string key =
platform::CreateKey(src_x_tz, ctx.OutputName("Out"));
platform::SumMKLDNNHandler handler(dev_ctx, mkldnn_engine, key);
auto src_x_memory = handler.AcquireSrcMemory(
{{src_x_tz}, platform::MKLDNNGetDataType<T>(), x->format()},
paddle::platform::to_void_cast(x_data));
const T* x_data = x->data<T>();
const T* y_data = y->data<T>();
auto src_y_memory = handler.AcquireSecondSrcMemory(
{{src_y_tz}, platform::MKLDNNGetDataType<T>(), y->format()},
paddle::platform::to_void_cast(y_data));
auto src_x_tz = framework::vectorize<int64_t>(x->dims());
auto src_y_tz = framework::vectorize<int64_t>(y->dims());
auto dst_tz = framework::vectorize<int64_t>(z->dims());
auto dst_md = memory::desc({dst_tz}, platform::MKLDNNGetDataType<T>(),
MKLDNNMemoryFormat::any);
auto sum_pd = handler.AcquireSumPrimitiveDescriptor(
{src_x_memory, src_y_memory}, scales, dst_md);
std::vector<float> scales = {1.0f, 1.0f};
T* z_data =
z->mutable_data<T>(ctx.GetPlace(), sum_pd->dst_desc().get_size());
const std::string key =
platform::CreateKey(src_x_tz, ctx.OutputName("Out"));
auto dst_memory = handler.AcquireDstMemoryFromPrimitive(z_data);
platform::SumMKLDNNHandler handler(dev_ctx, mkldnn_engine, key);
auto sum_prim = handler.AcquireSum();
auto src_x_memory = handler.AcquireSrcMemory(
{{src_x_tz}, platform::MKLDNNGetDataType<T>(), x->format()},
paddle::platform::to_void_cast(x_data));
auto src_y_memory = handler.AcquireSecondSrcMemory(
{{src_y_tz}, platform::MKLDNNGetDataType<T>(), y->format()},
paddle::platform::to_void_cast(y_data));
auto dst_md = memory::desc({dst_tz}, platform::MKLDNNGetDataType<T>(),
MKLDNNMemoryFormat::any);
auto sum_pd = handler.AcquireSumPrimitiveDescriptor(
{src_x_memory, src_y_memory}, scales, dst_md);
T* z_data =
z->mutable_data<T>(ctx.GetPlace(), sum_pd->dst_desc().get_size());
auto dst_memory = handler.AcquireDstMemoryFromPrimitive(z_data);
auto sum_prim = handler.AcquireSum();
sum_prim->execute(astream, {{MKLDNN_ARG_MULTIPLE_SRC, *src_x_memory},
{MKLDNN_ARG_MULTIPLE_SRC + 1, *src_y_memory},
{MKLDNN_ARG_DST, *dst_memory}});
astream.wait();
mkldnn::stream astream(mkldnn_engine);
sum_prim->execute(astream, {{MKLDNN_ARG_MULTIPLE_SRC, *src_x_memory},
{MKLDNN_ARG_MULTIPLE_SRC + 1, *src_y_memory},
{MKLDNN_ARG_DST, *dst_memory}});
astream.wait();
z->set_layout(DataLayout::kMKLDNN);
z->set_format(platform::GetMKLDNNFormat(*dst_memory));
}
z->set_layout(DataLayout::kMKLDNN);
z->set_format(platform::GetMKLDNNFormat(*dst_memory));
}
};
......@@ -184,40 +106,23 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
int axis = ctx.Attr<int>("axis");
// skip out, x, y,
// dout length is larger or equal than dx, dy.
auto* out = dout;
auto *x = dout, *y = dout;
auto set_mkldnn_format = [](Tensor* in, const Tensor* out) {
in->set_layout(DataLayout::kMKLDNN);
in->set_format(out->format());
};
if (dx != nullptr && dy != nullptr && dx->dims() == dy->dims()) {
if (dx->dims() == dy->dims()) {
auto blas = math::GetBlas<paddle::platform::CPUDeviceContext, T>(ctx);
if (dx) {
blas.VCOPY(dout->numel(), dout->data<T>(),
dx->mutable_data<T>(ctx.GetPlace()));
set_mkldnn_format(dx, dout);
}
if (dy) {
blas.VCOPY(dout->numel(), dout->data<T>(),
dy->mutable_data<T>(ctx.GetPlace()));
set_mkldnn_format(dy, dout);
}
}
} else {
// Execute default kernel when broadcast is needed
x = ctx.Input<Tensor>("X");
y = ctx.Input<Tensor>("Y");
ElemwiseExplicitGradCompute<paddle::platform::CPUDeviceContext, T,
IdentityGrad<T>, IdentityGrad<T>>(
ctx, *x, *y, *out, *dout, axis, dx, dy, IdentityGrad<T>(),
IdentityGrad<T>());
auto blas = math::GetBlas<paddle::platform::CPUDeviceContext, T>(ctx);
if (dx) {
blas.VCOPY(dout->numel(), dout->data<T>(),
dx->mutable_data<T>(ctx.GetPlace()));
set_mkldnn_format(dx, dout);
}
if (dy) {
blas.VCOPY(dout->numel(), dout->data<T>(),
dy->mutable_data<T>(ctx.GetPlace()));
set_mkldnn_format(dy, dout);
}
}
};
......
......@@ -15,121 +15,38 @@
from __future__ import print_function
import unittest
import numpy as np
import paddle.fluid.core as core
from paddle.fluid.tests.unittests.op_test import OpTest, skip_check_grad_ci
from paddle.fluid.tests.unittests.test_elementwise_add_op import *
'''
Some tests differ from the tests defined in test_elementwise_add_op.py
because MKLDNN does not support tensors of number of dimensions 3.
MKLDNN does not support tensors of dimensions number equal to 3.
Such dimensions cause exceptions in MKLDNN reorder primitive.
The DNNL-based kernel is used only when broadcasting is not required
(see GetExpectedKernelType() methods in elementwise_add_op.h).
'''
class TestMKLDNNElementwiseAddOp(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype)
self.y = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype)
self.out = np.add(self.x, self.y)
def init_kernel_type(self):
self.use_mkldnn = True
@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1) to test broadcast.")
class TestMKLDNNElementwiseAddOp_scalar(TestElementwiseAddOp_scalar):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
self.y = np.random.rand(1).astype(self.dtype)
self.out = self.x + self.y
def init_kernel_type(self):
self.use_mkldnn = True
@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1,1) to test broadcast.")
class TestMKLDNNElementwiseAddOp_scalar2(TestElementwiseAddOp_scalar2):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
self.y = np.random.rand(1, 1).astype(self.dtype)
self.out = self.x + self.y
def init_data_format(self):
self.data_format = 'MKLDNN'
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_Vector(TestElementwiseAddOp_Vector):
def init_kernel_type(self):
self.use_mkldnn = True
def init_dtype(self):
self.dtype = np.float32
class TesMKLDNNtElementwiseAddOp_broadcast_0(TestElementwiseAddOp_broadcast_0):
class TestMKLDNNElementwiseAddOp2(TestMKLDNNElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(100, 2, 3, 4).astype(self.dtype)
self.y = np.random.rand(100).astype(self.dtype)
self.out = self.x + self.y.reshape(100, 1, 1, 1)
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_broadcast_1(TestElementwiseAddOp_broadcast_1):
def init_input_output(self):
self.x = np.random.rand(2, 100, 3, 4).astype(self.dtype)
self.y = np.random.rand(100).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 100, 1, 1)
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_broadcast_2(TestElementwiseAddOp_broadcast_2):
def init_input_output(self):
self.x = np.random.rand(2, 2, 3, 100).astype(self.dtype)
self.y = np.random.rand(100).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 1, 1, 100)
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_broadcast_3(TestElementwiseAddOp_broadcast_3):
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_broadcast_4(TestElementwiseAddOp_broadcast_4):
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_rowwise_add_0(
TestElementwiseAddOp_rowwise_add_0):
def init_input_output(self):
self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype)
self.y = np.random.rand(10, 12).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 10, 12, 1)
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_rowwise_add_1(
TestElementwiseAddOp_rowwise_add_1):
def init_kernel_type(self):
self.use_mkldnn = True
self.x = np.random.random((100, )).astype(self.dtype)
self.y = np.random.random((100, )).astype(self.dtype)
self.out = np.add(self.x, self.y)
class TestMKLDNNElementwiseAddOp_channelwise_add(
TestElementwiseAddOp_channelwise_add):
class TestMKLDNNElementwiseAddOp3(TestMKLDNNElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(100, 2, 3, 3).astype(self.dtype)
self.y = np.random.rand(100, 1, 1, 1).astype(self.dtype)
self.out = self.x + self.y
def init_kernel_type(self):
self.use_mkldnn = True
self.x = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype)
self.y = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype)
self.out = np.add(self.x, self.y)
if __name__ == '__main__':
......
......@@ -27,8 +27,6 @@ class TestElementwiseAddOp(OpTest):
def setUp(self):
self.op_type = "elementwise_add"
self.dtype = np.float64
self.axis = -1
self.init_dtype()
self.init_input_output()
self.init_kernel_type()
......@@ -78,10 +76,10 @@ class TestElementwiseAddOp(OpTest):
self.out = np.add(self.x, self.y)
def init_dtype(self):
pass
self.dtype = np.float64
def init_axis(self):
pass
self.axis = -1
@unittest.skipIf(not core.is_compiled_with_cuda(),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册