diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h index ddad70a6a5f31ccb974f78ca35f045c59f45b8be..46201d5e5c9a12bdff92ff6aed53fe9ac81788e2 100644 --- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h +++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h @@ -47,13 +47,13 @@ class EltwiseMKLDNNKernel : public framework::OpKernel { float scale_o = ctx.Attr("Scale_out"); int axis = ctx.Attr("axis"); - platform::BinaryMKLDNNHandler handler( - BINARY_OP, axis, dev_ctx, mkldnn_engine, ctx.GetPlace(), x, y, z, - scale_x, scale_y, scale_o, ctx.OutputName("Out")); + platform::BinaryMKLDNNHandler handler( BINARY_OP, axis, mkldnn_engine, ctx.GetPlace(), x, y, z, scale_x, scale_y, scale_o); const auto src_x_memory = handler.AcquireSrcMemory(x); const auto src_y_memory = handler.AcquireSecondSrcMemory(y); - const auto dst_memory = handler.AcquireDstMemory(z); + // For Inplace src and and dst are the same memory object + auto dst_memory = + x->IsSharedBufferWith(*z) ? src_x_memory : handler.AcquireDstMemory(z); const auto binary_prim = handler.AcquireForwardPrimitive(); diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mul_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mul_mkldnn_op.cc index 1c246e8d18937087639129d32001a297eec3ca42..508867ed37434eaa536f1819cc83e8b69cb2464b 100644 --- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mul_mkldnn_op.cc +++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mul_mkldnn_op.cc @@ -48,9 +48,8 @@ class EltwiseMulMKLDNNGradKernel : public ElemwiseGradKernel { if (dx) { // dx = dout*y platform::BinaryMKLDNNHandler handler( - dnnl::algorithm::binary_mul, axis, dev_ctx, mkldnn_engine, - ctx.GetPlace(), dout, y, dx, 1.0f, 1.0f, 1.0f, - ctx.InputName(framework::GradVarName("Out"))); + dnnl::algorithm::binary_mul, axis, mkldnn_engine, + ctx.GetPlace(), dout, y, dx, 1.0f, 1.0f, 1.0f); const auto src_dout_memory = handler.AcquireSrcMemory(dout); const auto src_y_memory = handler.AcquireSecondSrcMemory(y); @@ -75,9 +74,8 @@ class EltwiseMulMKLDNNGradKernel : public ElemwiseGradKernel { // Handler is having nullptr passed instead of output tensor as // we want Dst buffer to be allocated by oneDNN not to use Tensor platform::BinaryMKLDNNHandler handler( - dnnl::algorithm::binary_mul, axis, dev_ctx, mkldnn_engine, - ctx.GetPlace(), dout, x, nullptr, 1.0f, 1.0f, 1.0f, - ctx.InputName(framework::GradVarName("Out"))); + dnnl::algorithm::binary_mul, axis, mkldnn_engine, + ctx.GetPlace(), dout, x, nullptr, 1.0f, 1.0f, 1.0f); const auto src_dout_memory = handler.AcquireSrcMemory(dout); const auto src_x_memory = handler.AcquireSecondSrcMemory(x); diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc index 91cc466207217aacc3f8ad4d9f5f544e5b48e0b7..fc647e184cb816b5df685d8c3369d3d9bf9a99bc 100644 --- a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc @@ -57,9 +57,8 @@ class SoftmaxMKLDNNHandler platform::Place cpu_place, const Tensor* out, const Tensor* out_grad, Tensor* in_x_grad, const std::string& unique_name) - : platform::MKLDNNHandlerT( - dev_ctx, mkldnn_engine, cpu_place) { + : platform::MKLDNNHandlerNoCachingT(mkldnn_engine, cpu_place) { PADDLE_ENFORCE_EQ( out_grad->dims(), in_x_grad->dims(), platform::errors::InvalidArgument("The shape of softmax_grad's input " diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index afdd41f706a65d509dc0bb4fed3778a20d05f0f0..f846f7010e095a654c9b78cc2ad8785dacbfbcc6 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -49,18 +49,17 @@ class MKLDNNHandlerNoCachingT { } std::shared_ptr AcquireForwardPrimitive() { - return forward_p = std::make_shared(*fwd_pd_); + return std::make_shared(*fwd_pd_); } std::shared_ptr AcquireBackwardPrimitive() { - return backward_p = std::make_shared(*bwd_pd_); + return std::make_shared(*bwd_pd_); } std::shared_ptr AcquireBackwardWeightsPrimitive() { PADDLE_ENFORCE_NOT_NULL(bwd_w_pd_, platform::errors::Unavailable( "Error: BWD_PD should be set when " - "getting BWD prim witk key: %s .", - key_p)); + "getting BWD prim .")); return std::make_shared(*bwd_w_pd_); } @@ -802,19 +801,13 @@ class MKLDNNHandler { }; template -class BinaryMKLDNNHandler : public platform::MKLDNNHandlerT { +class BinaryMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT { public: BinaryMKLDNNHandler(const dnnl::algorithm algo, const int axis, - const MKLDNNDeviceContext& dev_ctx, const mkldnn::engine engine, platform::Place cpu_place, const Tensor* x, const Tensor* y, Tensor* z, - float scale_x, float scale_y, float scale_z, - const std::string& uniq_name) - : platform::MKLDNNHandlerT( - dev_ctx, engine, cpu_place, - platform::CreateKey(dev_ctx, framework::vectorize(x->dims()), - uniq_name)) { - if (!this->isCached()) { + float scale_x, float scale_y, float scale_z) + : platform::MKLDNNHandlerNoCachingT(engine, cpu_place) { PADDLE_ENFORCE_EQ( x->layout(), DataLayout::kMKLDNN, platform::errors::InvalidArgument("Wrong layout set for X tensor.")); @@ -858,7 +851,6 @@ class BinaryMKLDNNHandler : public platform::MKLDNNHandlerT { auto attributes = CreateAttributes(algo, scale_x, scale_y, scale_z); this->AcquireForwardPrimitiveDescriptor(attributes, algo, src0_md, src1_md, dst_md); - } } std::shared_ptr AcquireSecondSrcMemory(