improve elementwise_add_mkldnn_op test code coverage (#22359)

92462e94 · Wojciech Uss · Tao Luo · e6ca512a · 92462e94 · 92462e94
4 changed file
--- a/paddle/fluid/operators/elementwise/elementwise_op.h
+++ b/paddle/fluid/operators/elementwise/elementwise_op.h
@@ -95,7 +95,13 @@ class ElementwiseOp : public framework::OperatorWithKernel {
    auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
 #ifdef PADDLE_WITH_MKLDNN
-    if (platform::CanMKLDNNBeUsed(ctx)) {
+    // If broadcasting is needed, use native implementation
+    auto CanMKLDNNElementwiseAddBeUsed = [&]() {
+      return ctx.Input<Tensor>("X")->dims() == ctx.Input<Tensor>("Y")->dims();
+    };
+    if (platform::CanMKLDNNBeUsed(ctx) &&
+        (ctx.Type() != "elementwise_add" || CanMKLDNNElementwiseAddBeUsed())) {
      return framework::OpKernelType(input_data_type, ctx.GetPlace(),
                                     framework::DataLayout::kMKLDNN,
                                     framework::LibraryType::kMKLDNN);
@@ -227,7 +233,16 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel {
        ctx, framework::GradVarName("Out"));
 #ifdef PADDLE_WITH_MKLDNN
-    if (platform::CanMKLDNNBeUsed(ctx)) {
+    // If broadcasting is needed, use native implementation
+    auto CanMKLDNNElementwiseAddGradBeUsed = [&]() {
+      auto dx = ctx.Output<Tensor>(framework::GradVarName("X"));
+      auto dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
+      return (dx != nullptr && dy != nullptr && dx->dims() == dy->dims());
+    };
+    if (platform::CanMKLDNNBeUsed(ctx) &&
+        (ctx.Type() != "elementwise_add_grad" ||
+         CanMKLDNNElementwiseAddGradBeUsed())) {
      return framework::OpKernelType(input_data_type, ctx.GetPlace(),
                                     framework::DataLayout::kMKLDNN,
                                     framework::LibraryType::kMKLDNN);

--- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
+++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
@@ -41,136 +41,58 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
    auto* x = ctx.Input<Tensor>("X");
    auto* y = ctx.Input<Tensor>("Y");
    auto* z = ctx.Output<Tensor>("Out");
-    const T* x_data = x->data<T>();
-    const T* y_data = y->data<T>();
-    int axis = ctx.Attr<int>("axis");
-    auto x_dims = x->dims();
-    auto y_dims_untrimed = y->dims();
-    auto z_dims = z->dims();
-    mkldnn::stream astream(mkldnn_engine);
-    // Execute default elementwise_add operator when
-    // broadcast operations need to performed.
-    if (x_dims != y_dims_untrimed) {
-      Tensor _x;
-      MKLDNNMemoryFormat format;
-      auto src_x_tz = framework::vectorize<int64_t>(x_dims);
-      if ((src_x_tz.size() == 3 &&
-           x->format() != (format = MKLDNNMemoryFormat::ncw)) ||
-          (src_x_tz.size() == 4 &&
-           x->format() != (format = MKLDNNMemoryFormat::nchw)) ||
-          (src_x_tz.size() == 5 &&
-           x->format() != (format = MKLDNNMemoryFormat::ncdhw))) {
-        _x.Resize(x_dims);
-        mkldnn::memory::data_type in_type = platform::MKLDNNGetDataType<T>();
-        auto out_format = platform::MKLDNNFormatForSize(
-            x_dims.size(), MKLDNNMemoryFormat::nchw);
-        const std::string key =
-            platform::CreateKey(src_x_tz, x->format(), out_format, in_type);
-        platform::ReorderMKLDNNHandler handler(src_x_tz, x->type(), in_type,
-                                               dev_ctx, mkldnn_engine, key);
-        auto user_x_memory_p = handler.AcquireSrcMemory(
-            x->format(), paddle::platform::to_void_cast(x_data));
-        auto x_memory_p =
-            handler.AcquireDstMemory(&_x, out_format, ctx.GetPlace());
-        auto x_reorder = handler.AcquireReorder(x_memory_p, user_x_memory_p);
-        x_reorder->execute(astream, *user_x_memory_p, *x_memory_p);
-        astream.wait();
-      } else {
-        format = x->format();
-        _x.ShareDataWith(*x);
-      }
-      z->mutable_data<T>(ctx.GetPlace());
-      auto sum_func = [](T a, T b) -> T { return a + b; };
-      TransformFunctor<decltype(sum_func), T,
+    PADDLE_ENFORCE_EQ(
-                       paddle::platform::CPUDeviceContext, T>
+        x->layout(), DataLayout::kMKLDNN,
-          functor(
+        platform::errors::InvalidArgument("Wrong layout set for X tensor"));
-              &_x, y, z,
+    PADDLE_ENFORCE_NE(
-              ctx.template device_context<paddle::platform::CPUDeviceContext>(),
+        x->format(), MKLDNNMemoryFormat::undef,
-              sum_func);
+        platform::errors::InvalidArgument("Wrong format set for X tensor"));
-      axis = (axis == -1 ? x_dims.size() - y_dims_untrimed.size() : axis);
+    PADDLE_ENFORCE_EQ(
-      PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
+        y->layout(), DataLayout::kMKLDNN,
-                     "Axis should be in range [0, x_dims)");
+        platform::errors::InvalidArgument("Wrong layout set for Y tensor"));
+    PADDLE_ENFORCE_NE(
+        y->format(), MKLDNNMemoryFormat::undef,
+        platform::errors::InvalidArgument("Wrong format set for Y tensor"));
-      auto y_dims = trim_trailing_singular_dims(y_dims_untrimed);
+    const T* x_data = x->data<T>();
-      axis = (y_dims.size() == 0) ? x_dims.size() : axis;
+    const T* y_data = y->data<T>();
-      int pre, n, post, is_run_common_broadcast;
-      get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post,
-                   &is_run_common_broadcast);
-      if (post == 1) {
-        functor.RunRowWise(n, pre);
-      } else {
-        functor.RunMidWise(n, pre, post);
-      }
-      z->set_layout(DataLayout::kMKLDNN);
-      z->set_format(format);
-    } else {
-      PADDLE_ENFORCE_EQ(x->layout(), DataLayout::kMKLDNN,
-                        "Wrong layout set for X tensor");
-      PADDLE_ENFORCE_NE(x->format(), MKLDNNMemoryFormat::undef,
-                        "Wrong format set for X tensor");
-      PADDLE_ENFORCE_EQ(y->layout(), DataLayout::kMKLDNN,
-                        "Wrong layout set for Y tensor");
-      PADDLE_ENFORCE_NE(y->format(), MKLDNNMemoryFormat::undef,
-                        "Wrong format set for Y tensor");
-      auto src_x_tz = framework::vectorize<int64_t>(x_dims);
-      auto src_y_tz = framework::vectorize<int64_t>(y_dims_untrimed);
-      auto dst_tz = framework::vectorize<int64_t>(z_dims);
-      std::vector<float> scales = {1.0f, 1.0f};
-      const std::string key =
-          platform::CreateKey(src_x_tz, ctx.OutputName("Out"));
-      platform::SumMKLDNNHandler handler(dev_ctx, mkldnn_engine, key);
-      auto src_x_memory = handler.AcquireSrcMemory(
-          {{src_x_tz}, platform::MKLDNNGetDataType<T>(), x->format()},
-          paddle::platform::to_void_cast(x_data));
-      auto src_y_memory = handler.AcquireSecondSrcMemory(
+    auto src_x_tz = framework::vectorize<int64_t>(x->dims());
-          {{src_y_tz}, platform::MKLDNNGetDataType<T>(), y->format()},
+    auto src_y_tz = framework::vectorize<int64_t>(y->dims());
-          paddle::platform::to_void_cast(y_data));
+    auto dst_tz = framework::vectorize<int64_t>(z->dims());
-      auto dst_md = memory::desc({dst_tz}, platform::MKLDNNGetDataType<T>(),
+    std::vector<float> scales = {1.0f, 1.0f};
-                                 MKLDNNMemoryFormat::any);
-      auto sum_pd = handler.AcquireSumPrimitiveDescriptor(
-          {src_x_memory, src_y_memory}, scales, dst_md);
-      T* z_data =
+    const std::string key =
-          z->mutable_data<T>(ctx.GetPlace(), sum_pd->dst_desc().get_size());
+        platform::CreateKey(src_x_tz, ctx.OutputName("Out"));
-      auto dst_memory = handler.AcquireDstMemoryFromPrimitive(z_data);
+    platform::SumMKLDNNHandler handler(dev_ctx, mkldnn_engine, key);
-      auto sum_prim = handler.AcquireSum();
+    auto src_x_memory = handler.AcquireSrcMemory(
+        {{src_x_tz}, platform::MKLDNNGetDataType<T>(), x->format()},
+        paddle::platform::to_void_cast(x_data));
+    auto src_y_memory = handler.AcquireSecondSrcMemory(
+        {{src_y_tz}, platform::MKLDNNGetDataType<T>(), y->format()},
+        paddle::platform::to_void_cast(y_data));
+    auto dst_md = memory::desc({dst_tz}, platform::MKLDNNGetDataType<T>(),
+                               MKLDNNMemoryFormat::any);
+    auto sum_pd = handler.AcquireSumPrimitiveDescriptor(
+        {src_x_memory, src_y_memory}, scales, dst_md);
+    T* z_data =
+        z->mutable_data<T>(ctx.GetPlace(), sum_pd->dst_desc().get_size());
+    auto dst_memory = handler.AcquireDstMemoryFromPrimitive(z_data);
+    auto sum_prim = handler.AcquireSum();
-      sum_prim->execute(astream, {{MKLDNN_ARG_MULTIPLE_SRC, *src_x_memory},
+    mkldnn::stream astream(mkldnn_engine);
-                                  {MKLDNN_ARG_MULTIPLE_SRC + 1, *src_y_memory},
+    sum_prim->execute(astream, {{MKLDNN_ARG_MULTIPLE_SRC, *src_x_memory},
-                                  {MKLDNN_ARG_DST, *dst_memory}});
+                                {MKLDNN_ARG_MULTIPLE_SRC + 1, *src_y_memory},
-      astream.wait();
+                                {MKLDNN_ARG_DST, *dst_memory}});
+    astream.wait();
-      z->set_layout(DataLayout::kMKLDNN);
+    z->set_layout(DataLayout::kMKLDNN);
-      z->set_format(platform::GetMKLDNNFormat(*dst_memory));
+    z->set_format(platform::GetMKLDNNFormat(*dst_memory));
-    }
  }
 };
@@ -184,40 +106,23 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
    auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
    auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
    auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
-    int axis = ctx.Attr<int>("axis");
-    // skip out, x, y,
-    // dout length is larger or equal than dx, dy.
-    auto* out = dout;
-    auto *x = dout, *y = dout;
    auto set_mkldnn_format = [](Tensor* in, const Tensor* out) {
      in->set_layout(DataLayout::kMKLDNN);
      in->set_format(out->format());
    };
-    if (dx != nullptr && dy != nullptr && dx->dims() == dy->dims()) {
+    auto blas = math::GetBlas<paddle::platform::CPUDeviceContext, T>(ctx);
-      if (dx->dims() == dy->dims()) {
+    if (dx) {
-        auto blas = math::GetBlas<paddle::platform::CPUDeviceContext, T>(ctx);
+      blas.VCOPY(dout->numel(), dout->data<T>(),
-        if (dx) {
+                 dx->mutable_data<T>(ctx.GetPlace()));
-          blas.VCOPY(dout->numel(), dout->data<T>(),
+      set_mkldnn_format(dx, dout);
-                     dx->mutable_data<T>(ctx.GetPlace()));
+    }
-          set_mkldnn_format(dx, dout);
-        }
+    if (dy) {
+      blas.VCOPY(dout->numel(), dout->data<T>(),
-        if (dy) {
+                 dy->mutable_data<T>(ctx.GetPlace()));
-          blas.VCOPY(dout->numel(), dout->data<T>(),
+      set_mkldnn_format(dy, dout);
-                     dy->mutable_data<T>(ctx.GetPlace()));
-          set_mkldnn_format(dy, dout);
-        }
-      }
-    } else {
-      // Execute default kernel when broadcast is needed
-      x = ctx.Input<Tensor>("X");
-      y = ctx.Input<Tensor>("Y");
-      ElemwiseExplicitGradCompute<paddle::platform::CPUDeviceContext, T,
-                                  IdentityGrad<T>, IdentityGrad<T>>(
-          ctx, *x, *y, *out, *dout, axis, dx, dy, IdentityGrad<T>(),
-          IdentityGrad<T>());
    }
  }
 };

--- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_mkldnn_op.py
@@ -15,121 +15,38 @@
 from __future__ import print_function
 import unittest
 import numpy as np
-import paddle.fluid.core as core
-from paddle.fluid.tests.unittests.op_test import OpTest, skip_check_grad_ci
 from paddle.fluid.tests.unittests.test_elementwise_add_op import *
 '''
-Some tests differ from the tests defined in test_elementwise_add_op.py
+MKLDNN does not support tensors of dimensions number equal to 3.
-because MKLDNN does not support tensors of number of dimensions 3.
 Such dimensions cause exceptions in MKLDNN reorder primitive.
+The DNNL-based kernel is used only when broadcasting is not required
+(see GetExpectedKernelType() methods in elementwise_add_op.h).
 '''
 class TestMKLDNNElementwiseAddOp(TestElementwiseAddOp):
-    def init_input_output(self):
+    def init_data_format(self):
-        self.x = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype)
+        self.data_format = 'MKLDNN'
-        self.y = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype)
-        self.out = np.add(self.x, self.y)
-    def init_kernel_type(self):
-        self.use_mkldnn = True
-@skip_check_grad_ci(
-    reason="[skip shape check] Use y_shape(1) to test broadcast.")
-class TestMKLDNNElementwiseAddOp_scalar(TestElementwiseAddOp_scalar):
-    def init_input_output(self):
-        self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
-        self.y = np.random.rand(1).astype(self.dtype)
-        self.out = self.x + self.y
-    def init_kernel_type(self):
-        self.use_mkldnn = True
-@skip_check_grad_ci(
-    reason="[skip shape check] Use y_shape(1,1) to test broadcast.")
-class TestMKLDNNElementwiseAddOp_scalar2(TestElementwiseAddOp_scalar2):
-    def init_input_output(self):
-        self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype)
-        self.y = np.random.rand(1, 1).astype(self.dtype)
-        self.out = self.x + self.y
    def init_kernel_type(self):
        self.use_mkldnn = True
+    def init_dtype(self):
-class TestMKLDNNElementwiseAddOp_Vector(TestElementwiseAddOp_Vector):
+        self.dtype = np.float32
-    def init_kernel_type(self):
-        self.use_mkldnn = True
-class TesMKLDNNtElementwiseAddOp_broadcast_0(TestElementwiseAddOp_broadcast_0):
+class TestMKLDNNElementwiseAddOp2(TestMKLDNNElementwiseAddOp):
    def init_input_output(self):
-        self.x = np.random.rand(100, 2, 3, 4).astype(self.dtype)
+        self.x = np.random.random((100, )).astype(self.dtype)
-        self.y = np.random.rand(100).astype(self.dtype)
+        self.y = np.random.random((100, )).astype(self.dtype)
-        self.out = self.x + self.y.reshape(100, 1, 1, 1)
+        self.out = np.add(self.x, self.y)
-    def init_kernel_type(self):
-        self.use_mkldnn = True
-class TestMKLDNNElementwiseAddOp_broadcast_1(TestElementwiseAddOp_broadcast_1):
-    def init_input_output(self):
-        self.x = np.random.rand(2, 100, 3, 4).astype(self.dtype)
-        self.y = np.random.rand(100).astype(self.dtype)
-        self.out = self.x + self.y.reshape(1, 100, 1, 1)
-    def init_kernel_type(self):
-        self.use_mkldnn = True
-class TestMKLDNNElementwiseAddOp_broadcast_2(TestElementwiseAddOp_broadcast_2):
-    def init_input_output(self):
-        self.x = np.random.rand(2, 2, 3, 100).astype(self.dtype)
-        self.y = np.random.rand(100).astype(self.dtype)
-        self.out = self.x + self.y.reshape(1, 1, 1, 100)
-    def init_kernel_type(self):
-        self.use_mkldnn = True
-class TestMKLDNNElementwiseAddOp_broadcast_3(TestElementwiseAddOp_broadcast_3):
-    def init_kernel_type(self):
-        self.use_mkldnn = True
-class TestMKLDNNElementwiseAddOp_broadcast_4(TestElementwiseAddOp_broadcast_4):
-    def init_kernel_type(self):
-        self.use_mkldnn = True
-class TestMKLDNNElementwiseAddOp_rowwise_add_0(
-        TestElementwiseAddOp_rowwise_add_0):
-    def init_input_output(self):
-        self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype)
-        self.y = np.random.rand(10, 12).astype(self.dtype)
-        self.out = self.x + self.y.reshape(1, 10, 12, 1)
-    def init_kernel_type(self):
-        self.use_mkldnn = True
-class TestMKLDNNElementwiseAddOp_rowwise_add_1(
-        TestElementwiseAddOp_rowwise_add_1):
-    def init_kernel_type(self):
-        self.use_mkldnn = True
-class TestMKLDNNElementwiseAddOp_channelwise_add(
+class TestMKLDNNElementwiseAddOp3(TestMKLDNNElementwiseAddOp):
-        TestElementwiseAddOp_channelwise_add):
    def init_input_output(self):
-        self.x = np.random.rand(100, 2, 3, 3).astype(self.dtype)
+        self.x = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype)
-        self.y = np.random.rand(100, 1, 1, 1).astype(self.dtype)
+        self.y = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype)
-        self.out = self.x + self.y
+        self.out = np.add(self.x, self.y)
-    def init_kernel_type(self):
-        self.use_mkldnn = True
 if __name__ == '__main__':

--- a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py
@@ -27,8 +27,6 @@ class TestElementwiseAddOp(OpTest):
    def setUp(self):
        self.op_type = "elementwise_add"
-        self.dtype = np.float64
-        self.axis = -1
        self.init_dtype()
        self.init_input_output()
        self.init_kernel_type()
@@ -78,10 +76,10 @@ class TestElementwiseAddOp(OpTest):
        self.out = np.add(self.x, self.y)
    def init_dtype(self):
-        pass
+        self.dtype = np.float64
    def init_axis(self):
-        pass
+        self.axis = -1
 @unittest.skipIf(not core.is_compiled_with_cuda(),