diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc index a400d27b798e37b47ce2740de8d95165f5155b1a..e6b1f6a1c18c38d94d9e3bc4807de7d8b952d60d 100644 --- a/paddle/fluid/operators/concat_op.cc +++ b/paddle/fluid/operators/concat_op.cc @@ -169,9 +169,21 @@ class ConcatOpGrad : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")), - ctx.GetPlace()); + auto input_data_type = OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Out")); + +#ifdef PADDLE_WITH_MKLDNN + // extra checking if attr "use_mkldnn" exist is needed because + // test_reverse_op is calling concat_grad kernel without setting + // "use_mkldnn" to any value + if (ctx.HasAttr("use_mkldnn") && + this->CanMKLDNNBeUsed(ctx, input_data_type)) { + return framework::OpKernelType(input_data_type, ctx.GetPlace(), + framework::DataLayout::kMKLDNN, + framework::LibraryType::kMKLDNN); + } +#endif + return framework::OpKernelType(input_data_type, ctx.GetPlace()); } framework::OpKernelType GetKernelTypeForVar( diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index 57a56776736ff9132ded8682f3dc1c8841d30e48..4cc96a48bd26f45cdeb9626c6cdd7936a3ba9818 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -23,6 +23,7 @@ namespace operators { using framework::DataLayout; using framework::Tensor; +using framework::LoDTensor; using mkldnn::memory; using mkldnn::primitive; using mkldnn::concat; @@ -149,6 +150,72 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel { output->set_format(platform::GetMKLDNNFormat(*dst_mem)); } }; + +template +class ConcatGradMKLDNNOpKernel : public paddle::framework::OpKernel { + public: + void Compute(const paddle::framework::ExecutionContext& ctx) const override { + const auto& dev_ctx = + ctx.template device_context(); + const auto& onednn_engine = dev_ctx.GetEngine(); + + auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); + + auto out_var_names = ctx.OutputNames(framework::GradVarName("X")); + + const auto x = ctx.MultiInput("X"); + const auto* dout = ctx.Input(framework::GradVarName("Out")); + auto dx = ctx.MultiOutput(framework::GradVarName("X")); + + for (size_t i = 0; i < dx.size(); ++i) { + if (dx[i] != nullptr) { + dx[i]->set_lod(x[i]->lod()); + } + } + + int axis = ctx.Attr("axis"); + if (ctx.HasInput("AxisTensor")) { + auto* axis_tensor = ctx.Input("AxisTensor"); + axis = GetDataFromTensor(axis_tensor)[0]; + } + + auto dout_vec_dims = framework::vectorize(dout->dims()); + + axis = ComputeAxis(axis, dout_vec_dims.size()); + + std::vector offset(dout_vec_dims.size(), 0); + + mkldnn::memory::data_type dout_type = + framework::ToMKLDNNDataType(dout->type()); + platform::ReorderMKLDNNHandler reorder_handler(dout_vec_dims, dout->type(), + dout_type, onednn_engine); + auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( + dout->format(), platform::to_void_cast(dout->data())); + + for (size_t i = 0; i < dx.size(); ++i) { + if (out_var_names[i] != framework::kEmptyVarName && + dx[i]->numel() != 0UL) { + auto dx_vec_dims = framework::vectorize(dx[i]->dims()); + auto slice_mem_p = reorder_handler.AcquireSubmemory( + dx_vec_dims, offset, reorder_src_memory_p); + + auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory( + dx[i], dx_vec_dims, dout->format(), ctx.GetPlace()); + auto reorder_p = + reorder_handler.AcquireReorder(reorder_dst_memory_p, slice_mem_p); + + reorder_p->execute(astream, *slice_mem_p, *reorder_dst_memory_p); + + offset[axis] += dx[i]->dims()[axis]; + + dx[i]->set_layout(framework::DataLayout::kMKLDNN); + dx[i]->set_format(platform::GetMKLDNNFormat(*reorder_dst_memory_p)); + } + } + astream.wait(); + } +}; + } // namespace operators } // namespace paddle @@ -159,3 +226,7 @@ REGISTER_OP_KERNEL(concat, MKLDNN, ::paddle::platform::CPUPlace, ops::ConcatMKLDNNOpKernel, ops::ConcatMKLDNNOpKernel, ops::ConcatMKLDNNOpKernel); + +REGISTER_OP_KERNEL(concat_grad, MKLDNN, ::paddle::platform::CPUPlace, + ops::ConcatGradMKLDNNOpKernel, + ops::ConcatGradMKLDNNOpKernel); diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_concat_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_bf16_mkldnn_op.py index 2b7b2b36afa4fb22c3bdfbb9beb8415f2159d99d..e53afaa57be1c85a18d06719562950c29005a614 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_concat_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_bf16_mkldnn_op.py @@ -40,13 +40,28 @@ class TestConcatBf16Op(OpTest): 'mkldnn_data_type': self.mkldnn_data_type } + self.sections = [self.x0.shape[self.axis]] * 2 + self.sections[1] += self.x1.shape[self.axis] + self.output = np.concatenate( (self.x0, self.x1, self.x2), axis=self.axis).astype(np.uint16) self.outputs = {'Out': self.output} + def calculate_grads(self): + self.dout = self.outputs['Out'] + self.dxs = np.split(self.dout, self.sections, self.axis) + def test_check_output(self): self.check_output_with_place(core.CPUPlace()) + def test_check_grad(self): + self.calculate_grads() + self.check_grad_with_place( + core.CPUPlace(), ["x0", "x1", "x2"], + "Out", + user_defined_grads=[self.dxs[0], self.dxs[1], self.dxs[2]], + user_defined_grad_outputs=[self.dout]) + # --------------------test concat bf16 in with axis 0-------------------- def init_test_data(self): @@ -61,9 +76,9 @@ class TestConcatBf16Op(OpTest): self.axis = 0 def init_shape(self): - self.x0_shape = [2, 2, 1, 2] - self.x1_shape = [1, 2, 1, 2] - self.x2_shape = [3, 2, 1, 2] + self.x0_shape = [6, 2, 4, 3] + self.x1_shape = [7, 2, 4, 3] + self.x2_shape = [8, 2, 4, 3] # --------------------test concat bf16 in with axis 1-------------------- @@ -74,9 +89,9 @@ class TestAxis1Case(TestConcatBf16Op): self.axis = 1 def init_shape(self): - self.x0_shape = [1, 1, 5, 5] - self.x1_shape = [1, 2, 5, 5] - self.x2_shape = [1, 3, 5, 5] + self.x0_shape = [1, 4, 5, 5] + self.x1_shape = [1, 8, 5, 5] + self.x2_shape = [1, 6, 5, 5] # --------------------test concat bf16 in with axis 2-------------------- diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_concat_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_mkldnn_op.py index 4900b42d3618d14c54a8a5beb2b027d7e415d047..7fc8f1d30802cdd309afcf4c5e32fc20ebddac4c 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_concat_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_mkldnn_op.py @@ -15,78 +15,90 @@ from __future__ import print_function import unittest -from paddle.fluid.tests.unittests.test_concat_op import TestConcatOp, TestConcatOp2, TestConcatOp3, TestConcatOp4 +import numpy as np +import struct - -class TestMKLDNNConcatOp(TestConcatOp): - def setUp(self): - super(TestMKLDNNConcatOp, self).setUp() - self.attrs["use_mkldnn"] = True - self._cpu_only = True - - def test_check_output(self): - # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_output(check_dygraph=(self.attrs["use_mkldnn"] == False)) - - def test_check_grad(self): - pass - - def init_kernel_type(self): - self.use_mkldnn = True +import paddle.fluid.core as core +from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16 +from paddle import enable_static -class TestMKLDNNConcatOp2(TestConcatOp2): +class TestConcatAxis0OneDNNOp(OpTest): def setUp(self): - super(TestMKLDNNConcatOp2, self).setUp() - self.attrs["use_mkldnn"] = True - self._cpu_only = True + self.op_type = "concat" + self.mkldnn_data_type = "float32" + self.init_axis() + self.init_shape() + self.init_test_data() + self.configure_datatype() + self.inputs = {'X': [('x0', self.x0), ('x1', self.x1), ('x2', self.x2)]} + self.attrs = { + 'axis': self.axis, + 'use_mkldnn': True, + 'mkldnn_data_type': self.mkldnn_data_type + } + + self.output = np.concatenate( + (self.x0, self.x1, self.x2), axis=self.axis).astype(self.dtype) + + self.outputs = {'Out': self.output} + + def configure_datatype(self): + self.mkldnn_data_type = "float32" + self.dtype = np.float32 def test_check_output(self): - # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_output(check_dygraph=(self.attrs["use_mkldnn"] == False)) + self.check_output_with_place(core.CPUPlace()) def test_check_grad(self): - pass + self.check_grad(['x0'], 'Out') + self.check_grad(['x1'], 'Out') + self.check_grad(['x2'], 'Out') - def init_kernel_type(self): - self.use_mkldnn = True + def init_test_data(self): + self.x0 = np.random.random(self.x0_shape).astype(np.float32) + self.x1 = np.random.random(self.x1_shape).astype(np.float32) + self.x2 = np.random.random(self.x2_shape).astype(np.float32) + def init_axis(self): + self.axis = 0 -class TestMKLDNNConcatOp3(TestConcatOp3): - def setUp(self): - super(TestMKLDNNConcatOp3, self).setUp() - self.attrs["use_mkldnn"] = True - self._cpu_only = True + def init_shape(self): + self.x0_shape = [2, 2, 1, 50] + self.x1_shape = [1, 2, 1, 50] + self.x2_shape = [3, 2, 1, 50] - def test_check_output(self): - # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_output(check_dygraph=(self.attrs["use_mkldnn"] == False)) - def test_check_grad(self): - pass +class TestConcatAxis1OneDNNOp(TestConcatAxis0OneDNNOp): + def init_axis(self): + self.axis = 1 - def init_kernel_type(self): - self.use_mkldnn = True + def init_shape(self): + self.x0_shape = [1, 1, 5, 50] + self.x1_shape = [1, 2, 5, 50] + self.x2_shape = [1, 3, 5, 50] -class TestMKLDNNConcatOp4(TestConcatOp4): - def setUp(self): - super(TestMKLDNNConcatOp4, self).setUp() - self.attrs["use_mkldnn"] = True - self._cpu_only = True +class TestConcatAxis2OneDNNOp(TestConcatAxis0OneDNNOp): + def init_axis(self): + self.axis = 2 - def test_check_output(self): - # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.check_output(check_dygraph=(self.attrs["use_mkldnn"] == False)) + def init_shape(self): + self.x0_shape = [2, 3, 4, 50] + self.x1_shape = [2, 3, 5, 50] + self.x2_shape = [2, 3, 6, 50] - def test_check_grad(self): - pass - def init_kernel_type(self): - self.use_mkldnn = True +class TestConcatAxis3OneDNNOp(TestConcatAxis0OneDNNOp): + def init_axis(self): + self.axis = 3 + + def init_shape(self): + self.x0_shape = [5, 3, 5, 5] + self.x1_shape = [5, 3, 5, 6] + self.x2_shape = [5, 3, 5, 7] if __name__ == '__main__': - from paddle import enable_static enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_concat_op.py b/python/paddle/fluid/tests/unittests/test_concat_op.py index 10cd774ce04bec2c60cb2e672a20c5f52ae82449..5f936e577a06fd611a149a2501be7bd845cc7905 100644 --- a/python/paddle/fluid/tests/unittests/test_concat_op.py +++ b/python/paddle/fluid/tests/unittests/test_concat_op.py @@ -16,7 +16,7 @@ from __future__ import print_function import unittest import numpy as np -from op_test import OpTest, skip_check_grad_ci +from paddle.fluid.tests.unittests.op_test import OpTest, skip_check_grad_ci import paddle.fluid as fluid from paddle.fluid import compiler, Program, program_guard, core import paddle