From 5d604a6b3050343efe5b62149ebcb06354e2b051 Mon Sep 17 00:00:00 2001 From: Wojciech Uss Date: Wed, 27 Jan 2021 08:43:00 +0100 Subject: [PATCH] - Disabling oneDNN inplace pass (#30588) (#30710) Co-authored-by: Jacek Czaja --- .../inference/api/paddle_pass_builder.cc | 7 +++-- .../operators/mkldnn/activation_mkldnn_op.cc | 6 ++--- .../fluid/operators/mkldnn/pool_mkldnn_op.cc | 2 +- .../operators/mkldnn/softmax_mkldnn_op.cc | 21 +++++++++------ paddle/fluid/platform/mkldnn_reuse.h | 27 ++++++++++--------- 5 files changed, 35 insertions(+), 28 deletions(-) diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 2e9b8e0d14..24c3645f99 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -224,12 +224,11 @@ void CpuPassStrategy::EnableMKLDNN() { // "fc_mkldnn_pass", // "fc_act_mkldnn_fuse_pass", "batch_norm_act_fuse_pass", -#ifndef _WIN32 // TODO(intel): Please fix the bug on windows. // https://github.com/PaddlePaddle/Paddle/issues/29710 - "mkldnn_inplace_pass", // This pass should be activated after - // fuses -#endif + // "mkldnn_inplace_pass", // This pass should be activated after + // fuses. Disabled by default due to + // little gain and lots of problems })) { passes_.push_back(pass); } diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc index 0971be6cfe..5c49e87730 100644 --- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc @@ -99,17 +99,17 @@ void eltwise_forward(const framework::ExecutionContext &ctx, "5, or 6, but now the dimension size is", x->dims().size())); + bool is_inplaced = x->IsSharedBufferWith(*y); auto src_tz = framework::vectorize(x->dims()); auto src_format = src_tz.size() == 2 ? MKLDNNMemoryFormat::nc : x->format(); platform::ActivationMKLDNNHandler handler( src_tz, algorithm, alpha, beta, src_format, dev_ctx, ctx.GetPlace(), - ctx.InputName("X")); + ctx.InputName("X"), is_inplaced); auto src_memory_p = handler.AcquireSrcMemory(x); - auto dst_memory_p = - x->IsSharedBufferWith(*y) ? src_memory_p : handler.AcquireDstMemory(y); + auto dst_memory_p = is_inplaced ? src_memory_p : handler.AcquireDstMemory(y); auto activation_p = handler.AcquireForwardPrimitive(); mkldnn::stream astream(dev_ctx.GetEngine()); diff --git a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc index 9488a1a440..858e783171 100644 --- a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc @@ -127,7 +127,7 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel { strides, ksize); platform::PoolingMKLDNNHandler::ComputeAdaptivePoolParameters( - ctx, paddle::framework::vectorize(in_x->dims()), ksize, strides); + ctx, paddle::framework::vectorize(in_x->dims()), &ksize, &strides); auto& dev_ctx = ctx.template device_context(); diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc index 3eb2e7084a..abe0a55653 100644 --- a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc @@ -48,13 +48,17 @@ class SoftmaxMKLDNNHandler const mkldnn::engine mkldnn_engine, platform::Place cpu_place, const Tensor* input, Tensor* output, const int axis, - const std::string uniq_name) + const std::string uniq_name, bool is_inplaced) : platform::MKLDNNHandlerT( dev_ctx, mkldnn_engine, cpu_place, // Softmax may be inplace then uniq_name is no longer unique - platform::CreateKey(dev_ctx, framework::vectorize(input->dims()), - axis, uniq_name)) { + is_inplaced ? platform::CreateKey( + dev_ctx, framework::vectorize(input->dims()), + axis, uniq_name) + : platform::CreateKey( + dev_ctx, framework::vectorize(input->dims()), + uniq_name)) { if (!this->isCached()) { PADDLE_ENFORCE_EQ( input->dims(), output->dims(), @@ -78,7 +82,7 @@ class SoftmaxMKLDNNHandler : platform::MKLDNNHandlerT( dev_ctx, dev_ctx.GetEngine(), cpu_place, - platform::CreateKey(dev_ctx, dims, axis, uniq_name)) { + platform::CreateKey(dev_ctx, dims, uniq_name)) { auto data_softmax_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), fmt); auto diff_softmax_md = @@ -98,17 +102,18 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { const Tensor* input = ctx.Input("X"); Tensor* output = ctx.Output("Out"); + bool is_inplaced = input->IsSharedBufferWith(*output); const int axis = CanonicalAxis(ctx.Attr("axis"), input->dims().size()); SoftmaxMKLDNNHandler handler(dev_ctx, mkldnn_engine, ctx.GetPlace(), - input, output, axis, ctx.OutputName("Out")); + input, output, axis, ctx.OutputName("Out"), + is_inplaced); auto softmax_src_memory_p = handler.AcquireSrcMemory(input); // For Inplace src and and dst are the same memory object - auto softmax_dst_memory_p = input->IsSharedBufferWith(*output) - ? softmax_src_memory_p - : handler.AcquireDstMemory(output); + auto softmax_dst_memory_p = + is_inplaced ? softmax_src_memory_p : handler.AcquireDstMemory(output); auto softmax_p = handler.AcquireForwardPrimitive(); diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index e884d879ff..42227da3cc 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -601,12 +601,15 @@ class ActivationMKLDNNHandler const MKLDNNMemoryFormat fmt, const platform::MKLDNNDeviceContext& dev_ctx, platform::Place cpu_place, - const std::string& unique_name) + const std::string& unique_name, bool is_inplaced) : platform::MKLDNNHandlerT( dev_ctx, dev_ctx.GetEngine(), cpu_place, - platform::CreateKey(dev_ctx, dims, "a", algorithm, unique_name)) { + is_inplaced + ? platform::CreateKey(dev_ctx, dims, "a", algorithm, + unique_name) + : platform::CreateKey(dev_ctx, dims, "a", unique_name)) { auto md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), fmt); this->AcquireForwardPrimitiveDescriptor(mkldnn::prop_kind::forward_training, @@ -624,7 +627,7 @@ class ActivationMKLDNNHandler : platform::MKLDNNHandlerT( dev_ctx, dev_ctx.GetEngine(), cpu_place, - platform::CreateKey(dev_ctx, dims, "a", algorithm, unique_name)) { + platform::CreateKey(dev_ctx, dims, "a", unique_name)) { auto diff_dst_md = platform::MKLDNNMemDesc( dims, platform::MKLDNNGetDataType(), diff_fmt); auto src_md = @@ -813,7 +816,7 @@ class PoolingMKLDNNHandler : public MKLDNNHandlerTAcquireForwardPrimitiveDescriptor( is_test ? mkldnn::prop_kind::forward_inference @@ -883,22 +886,22 @@ class PoolingMKLDNNHandler : public MKLDNNHandlerT& src_tz, std::vector& ksize, - std::vector& strides) { + const std::vector& src_tz, std::vector* ksize, + std::vector* strides) { if (ctx.Attr("adaptive")) { // (jczaja): oneDNN is supporting only unchangable in size pool window PADDLE_ENFORCE_EQ( - src_tz[src_tz.size() - 1] % ksize[1], 0, + src_tz[src_tz.size() - 1] % ksize->at(1), 0, platform::errors::Unimplemented( "Input dim must be divisible by corressponding ksize dim.")); PADDLE_ENFORCE_EQ( - src_tz[src_tz.size() - 2] % ksize[0], 0, + src_tz[src_tz.size() - 2] % ksize->at(0), 0, platform::errors::Unimplemented( "Input dim must be divisible by corressponding ksize dim.")); - ksize[0] = src_tz[src_tz.size() - 2] / ksize[0]; - ksize[1] = src_tz[src_tz.size() - 1] / ksize[1]; - strides[0] = ksize[0]; - strides[1] = ksize[1]; + ksize->at(0) = src_tz[src_tz.size() - 2] / ksize->at(0); + ksize->at(1) = src_tz[src_tz.size() - 1] / ksize->at(1); + strides->at(0) = ksize->at(0); + strides->at(1) = ksize->at(1); } } -- GitLab