From bf481550862d45eff1e5b968595f23959fe048bb Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Mon, 9 May 2022 09:27:29 +0200 Subject: [PATCH] [Ready to merge] oneDNN NHWC matmul & elementwise kernels fixes (#42506) * - fix to crash - more fixes - added diagnostic - matmul output fixes. - compilation fix - stop rotating too small shapes * - Added enabling of matmul_V2 onednn test --- .../operators/elementwise/elementwise_op.h | 5 ++-- paddle/fluid/operators/matmul_op.cc | 28 +++++++++++++++++++ paddle/fluid/operators/matmul_v2_op.cc | 16 +++++++++++ paddle/fluid/platform/mkldnn_helper.h | 22 ++++++++++----- .../mkldnn/test_matmul_v2_mkldnn_op.py | 2 ++ 5 files changed, 64 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h index adc0842fb38..95753bb3363 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_op.h @@ -103,11 +103,12 @@ class ElementwiseOp : public framework::OperatorWithKernel { std::vector out_dims_array(max_dim); #ifdef PADDLE_WITH_MKLDNN // (jczaja): Broadcasting of dims has to be done on Paddle shapes (NHWC) - // if model is using NHWC. + // if model is using NHWC and any of shapes in at least 3D bool should_rotate = ctx->IsRunMKLDNNKernel() && (platform::MKLDNNDeviceContext::tls().get_cur_paddle_data_layout() == - framework::DataLayout::kNHWC); + framework::DataLayout::kNHWC) && + (x_dims.size() >= 3 || y_dims.size() >= 3); if (should_rotate) { // Pick bigger shape and rotate this one bool x_over_y = (x_dims.size() > y_dims.size()); diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc index 0811407466d..2540af5d472 100644 --- a/paddle/fluid/operators/matmul_op.cc +++ b/paddle/fluid/operators/matmul_op.cc @@ -585,6 +585,19 @@ class MatMulOp : public framework::OperatorWithKernel { auto dim_x = GetDimForInput(*context, "X"); auto dim_y = GetDimForInput(*context, "Y"); + +#ifdef PADDLE_WITH_MKLDNN + // (jczaja): For NHWC execution output shape needs + // to be computed like instead x*y we are to do y*x + bool channelwise_onednn = + context->IsRunMKLDNNKernel() && + (platform::MKLDNNDeviceContext::tls().get_cur_paddle_data_layout() == + framework::DataLayout::kNHWC); + if (channelwise_onednn) { + std::swap(dim_x, dim_y); + } +#endif + auto mat_dim_x = phi::funcs::CreateMatrixDescriptor( RowMatrixFromVector(dim_x), 0, context->Attrs().Get("transpose_X")); @@ -770,6 +783,21 @@ class MatMulOp : public framework::OperatorWithKernel { framework::TransToProtoVarType(tensor.dtype()), tensor.place(), tensor.layout()); } else { +#ifdef PADDLE_WITH_MKLDNN + // When matmul is first oneDNN op in a chain (there was some non oneDNN op + // previously) + // then we also need to rotate shape NHWC -> NCWH + if ((expected_kernel_type.data_layout_ == + framework::DataLayout::kMKLDNN) && + (tensor.layout() != framework::DataLayout::kMKLDNN) && + paddle::platform::MKLDNNDeviceContext::tls() + .get_cur_paddle_data_layout() == + framework::DataLayout::kNHWC) { + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), + framework::DataLayout::kNHWC); + } +#endif return framework::OpKernelType(expected_kernel_type.data_type_, tensor.place(), tensor.layout()); } diff --git a/paddle/fluid/operators/matmul_v2_op.cc b/paddle/fluid/operators/matmul_v2_op.cc index 01fa01e3c6e..55294331a9c 100644 --- a/paddle/fluid/operators/matmul_v2_op.cc +++ b/paddle/fluid/operators/matmul_v2_op.cc @@ -274,6 +274,22 @@ class MatMulV2Op : public framework::OperatorWithKernel { framework::TransToProtoVarType(tensor.dtype()), tensor.place(), tensor.layout()); } else { +#ifdef PADDLE_WITH_MKLDNN + // When matmul_v2 is first oneDNN op in a chain (there was some non oneDNN + // op + // previously) + // then we also need to rotate shape NHWC -> NCWH + if ((expected_kernel_type.data_layout_ == + framework::DataLayout::kMKLDNN) && + (tensor.layout() != framework::DataLayout::kMKLDNN) && + paddle::platform::MKLDNNDeviceContext::tls() + .get_cur_paddle_data_layout() == + framework::DataLayout::kNHWC) { + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), + framework::DataLayout::kNHWC); + } +#endif return framework::OpKernelType(expected_kernel_type.data_type_, tensor.place(), tensor.layout()); } diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index 17736a87409..94c0124440e 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -78,13 +78,6 @@ tf_pd MKLDNNBwdPrimitiveDesc(const Engine& e, const Primitive& p, inline void MatchShapeToLayout(framework::Tensor* tensor_in, framework::DataLayout from, framework::DataLayout to) { - // In these data layouts, channel dimension is either on 2nd position: nChw or - // at last nhwC, so for dim==2 these layouts are the same and nothing should - // be done. Similarly for dim==1 when you have just one possible combination. - if (tensor_in->dims().size() < 3) { - return; - } - auto print_dims = [](const std::vector& dims) { std::ostringstream oss; @@ -101,6 +94,15 @@ inline void MatchShapeToLayout(framework::Tensor* tensor_in, return oss.str(); }; + // In these data layouts, channel dimension is either on 2nd position: nChw or + // at last nhwC, so for dim==2 these layouts are the same and nothing should + // be done. Similarly for dim==1 when you have just one possible combination. + if (tensor_in->dims().size() < 3) { + VLOG(3) << "Keeping kMKLDNN/kNHWC/kNDHWC output_shape" + << print_dims(phi::vectorize(tensor_in->dims())); + return; + } + switch (from) { case framework::DataLayout::kMKLDNN: if ((to == framework::DataLayout::kNHWC) || @@ -571,6 +573,12 @@ inline void RegisterModelLayout( std::vector>& ops, const platform::Place& place) { if (platform::is_cpu_place(place)) { + // If there is already registered NHWC then quit this call + // not to overwrite setting with analysis of internal "while" op block + if (platform::MKLDNNDeviceContext::tls().get_cur_paddle_data_layout() == + framework::DataLayout::kNHWC) + return; + VLOG(4) << "RegisterModelLayout for mkldnn"; auto check_attrib = [](std::unique_ptr& op, const std::string& attrib_name) -> bool { diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_v2_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_v2_mkldnn_op.py index 25701b797ec..4e59e41b608 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_v2_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_v2_mkldnn_op.py @@ -67,6 +67,8 @@ class TestMatMulV2VectorXVectorOneDNNOp(OpTest): self.y_shape = (100, ) self.trans_x = False self.trans_y = False + self._cpu_only = True + self.use_mkldnn = True def set_inputs(self, x, y): self.inputs = {'X': x, 'Y': y} -- GitLab