提交 92462e94 编写于 作者: W Wojciech Uss 提交者: Tao Luo

improve elementwise_add_mkldnn_op test code coverage (#22359)

上级 e6ca512a
...@@ -95,7 +95,13 @@ class ElementwiseOp : public framework::OperatorWithKernel { ...@@ -95,7 +95,13 @@ class ElementwiseOp : public framework::OperatorWithKernel {
auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
if (platform::CanMKLDNNBeUsed(ctx)) { // If broadcasting is needed, use native implementation
auto CanMKLDNNElementwiseAddBeUsed = [&]() {
return ctx.Input<Tensor>("X")->dims() == ctx.Input<Tensor>("Y")->dims();
};
if (platform::CanMKLDNNBeUsed(ctx) &&
(ctx.Type() != "elementwise_add" || CanMKLDNNElementwiseAddBeUsed())) {
return framework::OpKernelType(input_data_type, ctx.GetPlace(), return framework::OpKernelType(input_data_type, ctx.GetPlace(),
framework::DataLayout::kMKLDNN, framework::DataLayout::kMKLDNN,
framework::LibraryType::kMKLDNN); framework::LibraryType::kMKLDNN);
...@@ -227,7 +233,16 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel { ...@@ -227,7 +233,16 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel {
ctx, framework::GradVarName("Out")); ctx, framework::GradVarName("Out"));
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
if (platform::CanMKLDNNBeUsed(ctx)) { // If broadcasting is needed, use native implementation
auto CanMKLDNNElementwiseAddGradBeUsed = [&]() {
auto dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
return (dx != nullptr && dy != nullptr && dx->dims() == dy->dims());
};
if (platform::CanMKLDNNBeUsed(ctx) &&
(ctx.Type() != "elementwise_add_grad" ||
CanMKLDNNElementwiseAddGradBeUsed())) {
return framework::OpKernelType(input_data_type, ctx.GetPlace(), return framework::OpKernelType(input_data_type, ctx.GetPlace(),
framework::DataLayout::kMKLDNN, framework::DataLayout::kMKLDNN,
framework::LibraryType::kMKLDNN); framework::LibraryType::kMKLDNN);
......
...@@ -41,136 +41,58 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> { ...@@ -41,136 +41,58 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
auto* x = ctx.Input<Tensor>("X"); auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y"); auto* y = ctx.Input<Tensor>("Y");
auto* z = ctx.Output<Tensor>("Out"); auto* z = ctx.Output<Tensor>("Out");
const T* x_data = x->data<T>();
const T* y_data = y->data<T>();
int axis = ctx.Attr<int>("axis");
auto x_dims = x->dims();
auto y_dims_untrimed = y->dims();
auto z_dims = z->dims();
mkldnn::stream astream(mkldnn_engine);
// Execute default elementwise_add operator when
// broadcast operations need to performed.
if (x_dims != y_dims_untrimed) {
Tensor _x;
MKLDNNMemoryFormat format;
auto src_x_tz = framework::vectorize<int64_t>(x_dims);
if ((src_x_tz.size() == 3 &&
x->format() != (format = MKLDNNMemoryFormat::ncw)) ||
(src_x_tz.size() == 4 &&
x->format() != (format = MKLDNNMemoryFormat::nchw)) ||
(src_x_tz.size() == 5 &&
x->format() != (format = MKLDNNMemoryFormat::ncdhw))) {
_x.Resize(x_dims);
mkldnn::memory::data_type in_type = platform::MKLDNNGetDataType<T>();
auto out_format = platform::MKLDNNFormatForSize(
x_dims.size(), MKLDNNMemoryFormat::nchw);
const std::string key =
platform::CreateKey(src_x_tz, x->format(), out_format, in_type);
platform::ReorderMKLDNNHandler handler(src_x_tz, x->type(), in_type,
dev_ctx, mkldnn_engine, key);
auto user_x_memory_p = handler.AcquireSrcMemory(
x->format(), paddle::platform::to_void_cast(x_data));
auto x_memory_p =
handler.AcquireDstMemory(&_x, out_format, ctx.GetPlace());
auto x_reorder = handler.AcquireReorder(x_memory_p, user_x_memory_p);
x_reorder->execute(astream, *user_x_memory_p, *x_memory_p);
astream.wait();
} else {
format = x->format();
_x.ShareDataWith(*x);
}
z->mutable_data<T>(ctx.GetPlace());
auto sum_func = [](T a, T b) -> T { return a + b; };
TransformFunctor<decltype(sum_func), T, PADDLE_ENFORCE_EQ(
paddle::platform::CPUDeviceContext, T> x->layout(), DataLayout::kMKLDNN,
functor( platform::errors::InvalidArgument("Wrong layout set for X tensor"));
&_x, y, z, PADDLE_ENFORCE_NE(
ctx.template device_context<paddle::platform::CPUDeviceContext>(), x->format(), MKLDNNMemoryFormat::undef,
sum_func); platform::errors::InvalidArgument("Wrong format set for X tensor"));
axis = (axis == -1 ? x_dims.size() - y_dims_untrimed.size() : axis); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(), y->layout(), DataLayout::kMKLDNN,
"Axis should be in range [0, x_dims)"); platform::errors::InvalidArgument("Wrong layout set for Y tensor"));
PADDLE_ENFORCE_NE(
y->format(), MKLDNNMemoryFormat::undef,
platform::errors::InvalidArgument("Wrong format set for Y tensor"));
auto y_dims = trim_trailing_singular_dims(y_dims_untrimed); const T* x_data = x->data<T>();
axis = (y_dims.size() == 0) ? x_dims.size() : axis; const T* y_data = y->data<T>();
int pre, n, post, is_run_common_broadcast;
get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post,
&is_run_common_broadcast);
if (post == 1) {
functor.RunRowWise(n, pre);
} else {
functor.RunMidWise(n, pre, post);
}
z->set_layout(DataLayout::kMKLDNN);
z->set_format(format);
} else {
PADDLE_ENFORCE_EQ(x->layout(), DataLayout::kMKLDNN,
"Wrong layout set for X tensor");
PADDLE_ENFORCE_NE(x->format(), MKLDNNMemoryFormat::undef,
"Wrong format set for X tensor");
PADDLE_ENFORCE_EQ(y->layout(), DataLayout::kMKLDNN,
"Wrong layout set for Y tensor");
PADDLE_ENFORCE_NE(y->format(), MKLDNNMemoryFormat::undef,
"Wrong format set for Y tensor");
auto src_x_tz = framework::vectorize<int64_t>(x_dims);
auto src_y_tz = framework::vectorize<int64_t>(y_dims_untrimed);
auto dst_tz = framework::vectorize<int64_t>(z_dims);
std::vector<float> scales = {1.0f, 1.0f};
const std::string key =
platform::CreateKey(src_x_tz, ctx.OutputName("Out"));
platform::SumMKLDNNHandler handler(dev_ctx, mkldnn_engine, key);
auto src_x_memory = handler.AcquireSrcMemory(
{{src_x_tz}, platform::MKLDNNGetDataType<T>(), x->format()},
paddle::platform::to_void_cast(x_data));
auto src_y_memory = handler.AcquireSecondSrcMemory( auto src_x_tz = framework::vectorize<int64_t>(x->dims());
{{src_y_tz}, platform::MKLDNNGetDataType<T>(), y->format()}, auto src_y_tz = framework::vectorize<int64_t>(y->dims());
paddle::platform::to_void_cast(y_data)); auto dst_tz = framework::vectorize<int64_t>(z->dims());
auto dst_md = memory::desc({dst_tz}, platform::MKLDNNGetDataType<T>(), std::vector<float> scales = {1.0f, 1.0f};
MKLDNNMemoryFormat::any);
auto sum_pd = handler.AcquireSumPrimitiveDescriptor(
{src_x_memory, src_y_memory}, scales, dst_md);
T* z_data = const std::string key =
z->mutable_data<T>(ctx.GetPlace(), sum_pd->dst_desc().get_size()); platform::CreateKey(src_x_tz, ctx.OutputName("Out"));
auto dst_memory = handler.AcquireDstMemoryFromPrimitive(z_data); platform::SumMKLDNNHandler handler(dev_ctx, mkldnn_engine, key);
auto sum_prim = handler.AcquireSum(); auto src_x_memory = handler.AcquireSrcMemory(
{{src_x_tz}, platform::MKLDNNGetDataType<T>(), x->format()},
paddle::platform::to_void_cast(x_data));
auto src_y_memory = handler.AcquireSecondSrcMemory(
{{src_y_tz}, platform::MKLDNNGetDataType<T>(), y->format()},
paddle::platform::to_void_cast(y_data));
auto dst_md = memory::desc({dst_tz}, platform::MKLDNNGetDataType<T>(),
MKLDNNMemoryFormat::any);
auto sum_pd = handler.AcquireSumPrimitiveDescriptor(
{src_x_memory, src_y_memory}, scales, dst_md);
T* z_data =
z->mutable_data<T>(ctx.GetPlace(), sum_pd->dst_desc().get_size());
auto dst_memory = handler.AcquireDstMemoryFromPrimitive(z_data);
auto sum_prim = handler.AcquireSum();
sum_prim->execute(astream, {{MKLDNN_ARG_MULTIPLE_SRC, *src_x_memory}, mkldnn::stream astream(mkldnn_engine);
{MKLDNN_ARG_MULTIPLE_SRC + 1, *src_y_memory}, sum_prim->execute(astream, {{MKLDNN_ARG_MULTIPLE_SRC, *src_x_memory},
{MKLDNN_ARG_DST, *dst_memory}}); {MKLDNN_ARG_MULTIPLE_SRC + 1, *src_y_memory},
astream.wait(); {MKLDNN_ARG_DST, *dst_memory}});
astream.wait();
z->set_layout(DataLayout::kMKLDNN); z->set_layout(DataLayout::kMKLDNN);
z->set_format(platform::GetMKLDNNFormat(*dst_memory)); z->set_format(platform::GetMKLDNNFormat(*dst_memory));
}
} }
}; };
...@@ -184,40 +106,23 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> { ...@@ -184,40 +106,23 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out")); auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X")); auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y")); auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
int axis = ctx.Attr<int>("axis");
// skip out, x, y,
// dout length is larger or equal than dx, dy.
auto* out = dout;
auto *x = dout, *y = dout;
auto set_mkldnn_format = [](Tensor* in, const Tensor* out) { auto set_mkldnn_format = [](Tensor* in, const Tensor* out) {
in->set_layout(DataLayout::kMKLDNN); in->set_layout(DataLayout::kMKLDNN);
in->set_format(out->format()); in->set_format(out->format());
}; };
if (dx != nullptr && dy != nullptr && dx->dims() == dy->dims()) { auto blas = math::GetBlas<paddle::platform::CPUDeviceContext, T>(ctx);
if (dx->dims() == dy->dims()) { if (dx) {
auto blas = math::GetBlas<paddle::platform::CPUDeviceContext, T>(ctx); blas.VCOPY(dout->numel(), dout->data<T>(),
if (dx) { dx->mutable_data<T>(ctx.GetPlace()));
blas.VCOPY(dout->numel(), dout->data<T>(), set_mkldnn_format(dx, dout);
dx->mutable_data<T>(ctx.GetPlace())); }
set_mkldnn_format(dx, dout);
} if (dy) {
blas.VCOPY(dout->numel(), dout->data<T>(),
if (dy) { dy->mutable_data<T>(ctx.GetPlace()));
blas.VCOPY(dout->numel(), dout->data<T>(), set_mkldnn_format(dy, dout);
dy->mutable_data<T>(ctx.GetPlace()));
set_mkldnn_format(dy, dout);
}
}
} else {
// Execute default kernel when broadcast is needed
x = ctx.Input<Tensor>("X");
y = ctx.Input<Tensor>("Y");
ElemwiseExplicitGradCompute<paddle::platform::CPUDeviceContext, T,
IdentityGrad<T>, IdentityGrad<T>>(
ctx, *x, *y, *out, *dout, axis, dx, dy, IdentityGrad<T>(),
IdentityGrad<T>());
} }
} }
}; };
......
...@@ -15,121 +15,38 @@ ...@@ -15,121 +15,38 @@
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import numpy as np import numpy as np
import paddle.fluid.core as core
from paddle.fluid.tests.unittests.op_test import OpTest, skip_check_grad_ci
from paddle.fluid.tests.unittests.test_elementwise_add_op import * from paddle.fluid.tests.unittests.test_elementwise_add_op import *
''' '''
Some tests differ from the tests defined in test_elementwise_add_op.py MKLDNN does not support tensors of dimensions number equal to 3.
because MKLDNN does not support tensors of number of dimensions 3.
Such dimensions cause exceptions in MKLDNN reorder primitive. Such dimensions cause exceptions in MKLDNN reorder primitive.
The DNNL-based kernel is used only when broadcasting is not required
(see GetExpectedKernelType() methods in elementwise_add_op.h).
''' '''
class TestMKLDNNElementwiseAddOp(TestElementwiseAddOp): class TestMKLDNNElementwiseAddOp(TestElementwiseAddOp):
def init_input_output(self): def init_data_format(self):
self.x = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) self.data_format = 'MKLDNN'
self.y = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype)
self.out = np.add(self.x, self.y)
def init_kernel_type(self):
self.use_mkldnn = True
@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1) to test broadcast.")
class TestMKLDNNElementwiseAddOp_scalar(TestElementwiseAddOp_scalar):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
self.y = np.random.rand(1).astype(self.dtype)
self.out = self.x + self.y
def init_kernel_type(self):
self.use_mkldnn = True
@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1,1) to test broadcast.")
class TestMKLDNNElementwiseAddOp_scalar2(TestElementwiseAddOp_scalar2):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
self.y = np.random.rand(1, 1).astype(self.dtype)
self.out = self.x + self.y
def init_kernel_type(self): def init_kernel_type(self):
self.use_mkldnn = True self.use_mkldnn = True
def init_dtype(self):
class TestMKLDNNElementwiseAddOp_Vector(TestElementwiseAddOp_Vector): self.dtype = np.float32
def init_kernel_type(self):
self.use_mkldnn = True
class TesMKLDNNtElementwiseAddOp_broadcast_0(TestElementwiseAddOp_broadcast_0): class TestMKLDNNElementwiseAddOp2(TestMKLDNNElementwiseAddOp):
def init_input_output(self): def init_input_output(self):
self.x = np.random.rand(100, 2, 3, 4).astype(self.dtype) self.x = np.random.random((100, )).astype(self.dtype)
self.y = np.random.rand(100).astype(self.dtype) self.y = np.random.random((100, )).astype(self.dtype)
self.out = self.x + self.y.reshape(100, 1, 1, 1) self.out = np.add(self.x, self.y)
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_broadcast_1(TestElementwiseAddOp_broadcast_1):
def init_input_output(self):
self.x = np.random.rand(2, 100, 3, 4).astype(self.dtype)
self.y = np.random.rand(100).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 100, 1, 1)
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_broadcast_2(TestElementwiseAddOp_broadcast_2):
def init_input_output(self):
self.x = np.random.rand(2, 2, 3, 100).astype(self.dtype)
self.y = np.random.rand(100).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 1, 1, 100)
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_broadcast_3(TestElementwiseAddOp_broadcast_3):
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_broadcast_4(TestElementwiseAddOp_broadcast_4):
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_rowwise_add_0(
TestElementwiseAddOp_rowwise_add_0):
def init_input_output(self):
self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype)
self.y = np.random.rand(10, 12).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 10, 12, 1)
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_rowwise_add_1(
TestElementwiseAddOp_rowwise_add_1):
def init_kernel_type(self):
self.use_mkldnn = True
class TestMKLDNNElementwiseAddOp_channelwise_add( class TestMKLDNNElementwiseAddOp3(TestMKLDNNElementwiseAddOp):
TestElementwiseAddOp_channelwise_add):
def init_input_output(self): def init_input_output(self):
self.x = np.random.rand(100, 2, 3, 3).astype(self.dtype) self.x = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype)
self.y = np.random.rand(100, 1, 1, 1).astype(self.dtype) self.y = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype)
self.out = self.x + self.y self.out = np.add(self.x, self.y)
def init_kernel_type(self):
self.use_mkldnn = True
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -27,8 +27,6 @@ class TestElementwiseAddOp(OpTest): ...@@ -27,8 +27,6 @@ class TestElementwiseAddOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = "elementwise_add" self.op_type = "elementwise_add"
self.dtype = np.float64
self.axis = -1
self.init_dtype() self.init_dtype()
self.init_input_output() self.init_input_output()
self.init_kernel_type() self.init_kernel_type()
...@@ -78,10 +76,10 @@ class TestElementwiseAddOp(OpTest): ...@@ -78,10 +76,10 @@ class TestElementwiseAddOp(OpTest):
self.out = np.add(self.x, self.y) self.out = np.add(self.x, self.y)
def init_dtype(self): def init_dtype(self):
pass self.dtype = np.float64
def init_axis(self): def init_axis(self):
pass self.axis = -1
@unittest.skipIf(not core.is_compiled_with_cuda(), @unittest.skipIf(not core.is_compiled_with_cuda(),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册