From 365e6cfd15e64e381d64ff8554ca8b08ff7f33cc Mon Sep 17 00:00:00 2001 From: dengkaipeng Date: Tue, 5 Mar 2019 07:35:42 +0000 Subject: [PATCH] add mkldnn support. test=develop --- paddle/fluid/API.spec | 2 +- .../operators/mkldnn/softmax_mkldnn_op.cc | 79 ++++++++----------- .../mkldnn/test_softmax_mkldnn_op.py | 24 ++++++ .../fluid/tests/unittests/test_softmax_op.py | 12 ++- 4 files changed, 71 insertions(+), 46 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 66fc323e6b..251b1673a9 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -86,7 +86,7 @@ paddle.fluid.layers.conv2d (ArgSpec(args=['input', 'num_filters', 'filter_size', paddle.fluid.layers.conv3d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '37042620f9bd3a2da6e5d3138b2f724b')) paddle.fluid.layers.sequence_pool (ArgSpec(args=['input', 'pool_type', 'is_test'], varargs=None, keywords=None, defaults=(False,)), ('document', 'a194fb80614023f543df3949fbd0d0b8')) paddle.fluid.layers.sequence_softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '19ef6f9cdd27feac8a1ae060f19c10b4')) -paddle.fluid.layers.softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', 'f19dd380864e61134ce3814e4be0de4b')) +paddle.fluid.layers.softmax (ArgSpec(args=['input', 'use_cudnn', 'name', 'axis'], varargs=None, keywords=None, defaults=(False, None, -1)), ('document', 'f19dd380864e61134ce3814e4be0de4b')) paddle.fluid.layers.pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', 'bbd84e855e660cd1084bb71a2fd0cdaa')) paddle.fluid.layers.pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', '043de7333b79ee0ac55053c14ed81625')) paddle.fluid.layers.adaptive_pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '859b887174d06f361658f69cb7c06d95')) diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc index 4e4f482987..cff8cdd8f5 100644 --- a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc @@ -131,29 +131,22 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { if (axis != -1 && axis != rank - 1) { X_trans.mutable_data(framework::make_ddim(shape), ctx.GetPlace()); Out_trans.mutable_data(framework::make_ddim(shape), ctx.GetPlace()); - TransCompute(rank, dev_ctx, *X, &X_trans, perm); - TransCompute(rank, dev_ctx, *Out, &Out_trans, perm); - X_2d = framework::ReshapeToMatrix(X_trans, rank - 1); - Out_2d = framework::ReshapeToMatrix(Out_trans, rank - 1); + TransCompute(rank, dev_ctx, *X, &X_trans, perm); + TransCompute(rank, dev_ctx, *Out, &Out_trans, perm); + auto dims = X_trans.dims(); + auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1); + X_2d.ShareDataWith(X_trans).Resize(flattened_dims); + Out_2d.ShareDataWith(Out_trans).Resize(flattened_dims); } else { - X_2d = framework::ReshapeToMatrix(*X, rank - 1); - Out_2d = framework::ReshapeToMatrix(*Out, rank - 1); + auto dims = X->dims(); + auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1); + X_2d.ShareDataWith(*X).Resize(flattened_dims); + Out_2d.ShareDataWith(*Out).Resize(flattened_dims); } - // flatten input and output to 2-D matrixs - // auto dims = input->dims(); // input and output share the same shape - // auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1); - // framework::Tensor flattened_input; - // framework::Tensor flattened_output; - // flattened_input.ShareDataWith(*input).Resize(flattened_dims); - // flattened_output.ShareDataWith(*output).Resize(flattened_dims); - - // const T* input_data = flattened_input.data(); - // T* output_data = flattened_output.mutable_data(ctx.GetPlace()); const T* input_data = X_2d.data(); T* output_data = Out_2d.mutable_data(ctx.GetPlace()); - // std::vector src_tz = paddle::framework::vectorize2int(flattened_dims); std::vector src_tz = paddle::framework::vectorize2int(X_2d.dims()); std::vector dst_tz = src_tz; // Same memory descriptor to be used for input and output @@ -184,10 +177,16 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { // We cannot use softmax_dst_memory_p to get prim desc as // it contains flattened dims (2D) while output tensor can // have 2,3,4+ dims - auto output_mem_pd = paddle::platform::create_prim_desc_from_dims( - paddle::framework::vectorize2int(output->dims()), - mkldnn::memory::format::blocked); - output->set_mkldnn_prim_desc(output_mem_pd); + if (axis != -1 && axis != rank - 1) { + auto output_mem_pd = paddle::platform::create_prim_desc_from_dims( + shape, mkldnn::memory::format::blocked); + Out_trans.set_mkldnn_prim_desc(output_mem_pd); + } else { + auto output_mem_pd = paddle::platform::create_prim_desc_from_dims( + paddle::framework::vectorize2int(Out->dims()), + mkldnn::memory::format::blocked); + Out->set_mkldnn_prim_desc(output_mem_pd); + } std::vector pipeline{ *(static_cast(softmax_p.get()))}; @@ -203,7 +202,7 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { } if (axis != -1 && axis != rank - 1) { - TransCompute(rank, dev_ctx, Out_trans, Out, perm); + TransCompute(rank, dev_ctx, Out_trans, Out, perm); } } }; @@ -242,30 +241,22 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel { dX_trans.mutable_data(framework::make_ddim(shape), ctx.GetPlace()); Out_trans.mutable_data(framework::make_ddim(shape), ctx.GetPlace()); dOut_trans.mutable_data(framework::make_ddim(shape), ctx.GetPlace()); - TransCompute(rank, dev_ctx, *dX, &dX_trans, perm); - TransCompute(rank, dev_ctx, *Out, &Out_trans, perm); - TransCompute(rank, dev_ctx, *dOut, &dOut_trans, perm); - dX_2d = framework::ReshapeToMatrix(dX_trans, rank - 1); - Out_2d = framework::ReshapeToMatrix(Out_trans, rank - 1); - dOut_2d = framework::ReshapeToMatrix(dOut_trans, rank - 1); + TransCompute(rank, dev_ctx, *dX, &dX_trans, perm); + TransCompute(rank, dev_ctx, *Out, &Out_trans, perm); + TransCompute(rank, dev_ctx, *dOut, &dOut_trans, perm); + auto dims = dX_trans.dims(); + auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1); + dX_2d.ShareDataWith(dX_trans).Resize(flattened_dims); + Out_2d.ShareDataWith(Out_trans).Resize(flattened_dims); + dOut_2d.ShareDataWith(dOut_trans).Resize(flattened_dims); } else { - dX_2d = framework::ReshapeToMatrix(*dX, rank - 1); - Out_2d = framework::ReshapeToMatrix(*Out, rank - 1); - dOut_2d = framework::ReshapeToMatrix(*dOut, rank - 1); + auto dims = dX->dims(); + auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1); + dX_2d.ShareDataWith(*dX).Resize(flattened_dims); + Out_2d.ShareDataWith(*Out).Resize(flattened_dims); + dOut_2d.ShareDataWith(*dOut).Resize(flattened_dims); } - // auto dims = dout->dims(); // input and output share the same shape - // auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1); - // framework::Tensor flattened_output; - // framework::Tensor flattened_dout; - // framework::Tensor flattened_dx; - // flattened_output.ShareDataWith(*output).Resize(flattened_dims); - // flattened_dout.ShareDataWith(*dout).Resize(flattened_dims); - // flattened_dx.ShareDataWith(*dx).Resize(flattened_dims); - - // const T* dst_data = flattened_output.data(); - // const T* diff_dst_ptr = flattened_dout.template data(); - // T* diff_src_ptr = flattened_dx.template mutable_data(ctx.GetPlace()); const T* dst_data = Out_2d.data(); const T* diff_dst_ptr = dOut_2d.template data(); T* diff_src_ptr = dX_2d.template mutable_data(ctx.GetPlace()); @@ -317,7 +308,7 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel { stream(stream::kind::eager).submit(pipeline).wait(); if (axis != -1 && axis != rank - 1) { - TransCompute(rank, dev_ctx, dX_trans, dX, perm); + TransCompute(rank, dev_ctx, dX_trans, dX, perm); } } }; diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_mkldnn_op.py index 748b77f2bf..3cf05d5d9f 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_mkldnn_op.py @@ -32,6 +32,30 @@ class TestSoftmaxMKLDNNOp2(TestSoftmaxMKLDNNOp): return [2, 3, 4, 5] +class TestSoftmaxMKLDNNOp2(TestSoftmaxMKLDNNOp): + def get_x_shape(self): + return [2, 3, 4, 5] + + def get_axis(self): + return 0 + + +class TestSoftmaxMKLDNNOp2(TestSoftmaxMKLDNNOp): + def get_x_shape(self): + return [2, 3, 4, 5] + + def get_axis(self): + return 1 + + +class TestSoftmaxMKLDNNOp2(TestSoftmaxMKLDNNOp): + def get_x_shape(self): + return [2, 3, 4, 5] + + def get_axis(self): + return 2 + + # Check if primitives already exist in backward class TestSoftmaxMKLDNNPrimitivesAlreadyExist(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_softmax_op.py b/python/paddle/fluid/tests/unittests/test_softmax_op.py index 084fa869e3..2e779270f0 100644 --- a/python/paddle/fluid/tests/unittests/test_softmax_op.py +++ b/python/paddle/fluid/tests/unittests/test_softmax_op.py @@ -131,13 +131,23 @@ class TestSoftmaxCUDNNOp3(TestSoftmaxCUDNNOp): def get_x_shape(self): return [2, 3, 4, 5] + def get_axis(self): + return 0 + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestSoftmaxCUDNNOp4(TestSoftmaxCUDNNOp): + def get_x_shape(self): + return [2, 3, 4, 5] + def get_axis(self): return 1 @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") -class TestSoftmaxCUDNNOp2(TestSoftmaxCUDNNOp): +class TestSoftmaxCUDNNOp5(TestSoftmaxCUDNNOp): def get_x_shape(self): return [2, 3, 4, 5] -- GitLab