From 9e2ba9b90906d983131205ddeaa613ec60a7b5ec Mon Sep 17 00:00:00 2001 From: yeliang2258 <30516196+yeliang2258@users.noreply.github.com> Date: Tue, 29 Nov 2022 17:57:33 +0800 Subject: [PATCH] [cherry-pick] updating mul and matmul with set_mem_desc and fix squeeze_transpose for MKLDNN (#47951) * Fix slice bugs in MKLDNN when input dims are zeros (#46671) * fix slice bugs * fix * update code * fix * update code * updating mul and matmul with set_mem_desc (#45624) * - mul & matmul changes - fix - bs16 correction of strides * - cosmetic fixes * - lint * - fix * - fix * - format -> mem_desc * - fix * - fix * - fix * - fix * - fix * fix squueze_transpose (#47911) Co-authored-by: Jacek Czaja --- .../framework/ir/graph_pattern_detector.cc | 1 + .../operators/mkldnn/matmul_v2_mkldnn_op.cc | 33 ++++++++++--------- .../fluid/operators/mkldnn/mul_mkldnn_op.cc | 26 +++++++-------- paddle/fluid/platform/mkldnn_reuse.h | 3 +- paddle/phi/kernels/transfer_layout_kernel.cc | 6 ++-- 5 files changed, 37 insertions(+), 32 deletions(-) diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 92756e46ed4..fbf65ca47f3 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -1045,6 +1045,7 @@ PDNode *patterns::SeqConvEltAddRelu::operator()( PDNode *patterns::Squeeze2Transpose2::operator()() { auto *squeeze2_op_in = pattern->NewNode(squeeze2_op_in_repr()) ->AsInput() + ->assert_has_n_outputs(1) ->assert_is_op_input("squeeze2", "X"); auto *squeeze2_op = pattern->NewNode(squeeze2_op_repr()) ->assert_is_op("squeeze2") diff --git a/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc index f8c9c9d86a9..000e31aad9a 100644 --- a/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc @@ -214,10 +214,7 @@ class MatMulMKLDNNHandler } astream.wait(); - auto format = - MKLDNNFormatForSize(out->dims().size(), dnnl::memory::format_tag::nchw); - out->set_format(format); - out->set_layout(DataLayout::kMKLDNN); + out->set_mem_desc(dst_memory_p->get_desc().reshape(out->dims())); } std::shared_ptr AcquireDstMemory( @@ -651,10 +648,18 @@ void ExecuteMatMulV2(const ExecutionContext &ctx, auto &astream = MKLDNNDeviceContext::tls().get_stream(); matmul_p->execute(astream, matmul_args); astream.wait(); - auto format = - MKLDNNFormatForSize(out->dims().size(), dnnl::memory::format_tag::nchw); - out->set_format(format); - out->set_layout(DataLayout::kMKLDNN); + + // TODO(jczaja): Explain why int8 format of dst is ABCD and do not need + // permute + if (IsOutputFused(ctx) && !IsInt8()) { + auto axis = ctx.Attr>("fused_transpose_Out"); + auto permuted_md = dst_memory_p->get_desc().permute_axes(axis); + out->set_mem_desc( + permuted_md.reshape(phi::vectorize(out->dims()))); + } else { + out->set_mem_desc( + dst_memory_p->get_desc().reshape(phi::vectorize(out->dims()))); + } } template @@ -836,8 +841,7 @@ class MatMulV2GradMKLDNNKernel : public paddle::framework::OpKernel { reduction_p->execute(astream, reduction_args); astream.wait(); - dx->set_format(paddle::platform::GetMKLDNNFormat( - dst_memory_p->get_desc().reshape(squeezed_dims))); + dx->set_mem_desc(dst_memory_p->get_desc().reshape(squeezed_dims)); } std::vector ExtendDimsWithOnes(const std::vector &dims, @@ -1119,9 +1123,8 @@ void MatMulGradMKLDNNKernel::ExecuteMatMulGrad( matmul_p->execute(astream, matmul_args); astream.wait(); - out->set_layout(framework::DataLayout::kMKLDNN); - out->set_format(platform::GetMKLDNNFormat( - dst_memory_p->get_desc().reshape(vectorize(out->dims())))); + out->set_mem_desc( + dst_memory_p->get_desc().reshape(vectorize(out->dims()))); } template @@ -1184,13 +1187,13 @@ void MatMulGradMKLDNNKernel::RunKernel(const ExecutionContext &ctx) const { if (dx) { if (dx_dims != x.dims()) { dx->Resize(dx_dims); - dx->set_format(x.format()); + dx->set_mem_desc(x.mem_desc()); } } if (dy) { if (dy_dims != y.dims()) { dy->Resize(dy_dims); - dy->set_format(y.format()); + dy->set_mem_desc(y.mem_desc()); } } } diff --git a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc index e727a4fe9fb..e9150b0c58f 100644 --- a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc @@ -221,7 +221,7 @@ class MulPrimitiveFactory { to_void_cast(x_tmp.data())); x_tmp.Resize(data->dims()); - x_tmp.set_format(platform::GetMKLDNNFormat(dst_mdesc)); + x_tmp.set_mem_desc(dst_mdesc); data_matrix = framework::ReshapeToMatrix(x_tmp, num_col_dims); } else { data_matrix = framework::ReshapeToMatrix(*data, num_col_dims); @@ -235,11 +235,7 @@ class MulPrimitiveFactory { const Tensor *in) { x_input_->set_data_handle(to_void_cast(in->data())); output_->set_data_handle(out->mutable_data(ctx.GetPlace())); - - if (out->format() == MKLDNNMemoryFormat::undef) { - auto output_format = platform::GetMKLDNNFormat(*output_); - out->set_format((MKLDNNMemoryFormat)output_format); - } + out->set_mem_desc(output_->get_desc()); } template @@ -272,7 +268,7 @@ class MulPrimitiveFactory { auto buffer_size = dst_desc.get_size(); OT *output_data = output->mutable_data(ctx.GetPlace(), buffer_size); - output->set_format(paddle::platform::GetMKLDNNFormat(dst_desc)); + output->set_mem_desc(dst_desc); return memory(dst_desc, engine_, to_void_cast(output_data)); } @@ -392,9 +388,10 @@ class MulMKLDNNINT8Kernel : public framework::OpKernel { if (out_dims.size() != 2) { out->Resize(out_dims); } - out->set_layout(DataLayout::kMKLDNN); - out->set_format(platform::MKLDNNFormatForSize(out_dims.size(), - MKLDNNMemoryFormat::nchw)); + + auto in_md = dnnl::memory::desc(*dnnl_primitive_desc_query_md( + mul.get_primitive_desc(), dnnl_query_dst_md, 0)); + out->set_mem_desc(in_md.reshape(phi::vectorize(out->dims()))); } }; @@ -442,10 +439,11 @@ class MulMKLDNNKernel : public framework::OpKernel { matmul_p->execute(astream, matmul_args); astream.wait(); - out->set_layout(framework::DataLayout::kMKLDNN); - // plain output formats are enforced inside handler - out->set_format(platform::MKLDNNFormatForSize( - out->dims().size(), dnnl::memory::format_tag::nchw)); + // This kernel is flattening dims so then we need to unflattened version + // that should be set in out reshape require plain layout, but + // MatmulV2MKLDNNHanlder enforces one so it should work + out->set_mem_desc( + dst_memory_p->get_desc().reshape(phi::vectorize(out->dims()))); } private: diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 028c2d1426e..221eab737d9 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -301,7 +301,8 @@ class MatMulV2MKLDNNHandler out_strides[i] = out_ddims[i + 1] * out_strides[i + 1]; } - if (!IsInt8() && !IsBfloat16() && is_output_fused) { + // TODO(jczaja): Why not for int8?? + if (!IsInt8() && is_output_fused) { out_strides = FakeTransposeStrides(out_ddims); } diff --git a/paddle/phi/kernels/transfer_layout_kernel.cc b/paddle/phi/kernels/transfer_layout_kernel.cc index 25a986ea82f..be232b7c671 100644 --- a/paddle/phi/kernels/transfer_layout_kernel.cc +++ b/paddle/phi/kernels/transfer_layout_kernel.cc @@ -121,8 +121,10 @@ void TransferLayoutMKLDNN(const Context& dev_ctx, OneDNNContext::tls().set_cur_paddle_data_layout(src_layout); } - out->set_layout(DataLayout::ONEDNN); - out->set_format(out_format); + dnnl::memory::desc out_mem_desc(vectorize(out->dims()), + funcs::ToOneDNNDataType(x.dtype()), + out_format); + out->set_mem_desc(out_mem_desc); } else if (src_layout == DataLayout::ONEDNN && dst_layout != DataLayout::ONEDNN) { // Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel -- GitLab