From 47f670d58c0df1d49914366f3cda5232bf59448e Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Wed, 11 Sep 2019 11:44:07 +0200 Subject: [PATCH] - Softmax mkl-dnn refactoring (#19615) test=develop - Cosmetic fixes test=develop --- .../operators/mkldnn/softmax_mkldnn_op.cc | 187 ++++++++---------- 1 file changed, 86 insertions(+), 101 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc index 138edc4d92..f1e711249d 100644 --- a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc @@ -38,52 +38,69 @@ class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler { SoftmaxMKLDNNHandler(const std::vector& dims, const MKLDNNMemoryFormat fmt, const platform::MKLDNNDeviceContext& dev_ctx, - mkldnn::engine engine, const std::string& base_key) - : platform::MKLDNNHandler(dev_ctx, engine, base_key), - dims_(dims), - fmt_(fmt) {} + platform::Place cpu_place, const std::string& uniq_name) + : platform::MKLDNNHandler(dev_ctx, dev_ctx.GetEngine(), + platform::GetHash(dims, uniq_name)), + place_(cpu_place), + fwd_pd_(nullptr), + bwd_pd_(nullptr) { + this->AcquireSoftmaxPrimitiveDescriptor(dims, fmt); + } SoftmaxMKLDNNHandler(const std::vector& dims, const MKLDNNMemoryFormat fmt, const MKLDNNMemoryFormat diff_fmt, const platform::MKLDNNDeviceContext& dev_ctx, - mkldnn::engine engine, const std::string& base_key) - : platform::MKLDNNHandler(dev_ctx, engine, base_key), - dims_(dims), - fmt_(fmt), - diff_fmt_(diff_fmt) { + platform::Place cpu_place, const std::string& uniq_name) + : platform::MKLDNNHandler(dev_ctx, dev_ctx.GetEngine(), + platform::GetHash(dims, uniq_name)), + place_(cpu_place), + fwd_pd_(nullptr), + bwd_pd_(nullptr) { // If we are in Grad operatgor then update a key with BWD suffix to // distinguish from FWD memory primitives // Key_common will allow to access FWD_PD from cache - key_ += "-BWD"; + this->AcquireSoftmaxPrimitiveDescriptor(dims, fmt); + this->AcquireSoftmaxBackwardPrimitiveDescriptor(dims, fmt, diff_fmt); } // TODO(jczaja): Once fwd_pd_ are moved to MKLDNNHandler then this function // should be moved as well eg. SoftmaxMKLDNNHandler -> MKLDNNHandler - std::shared_ptr AcquireSrcMemory(void* ptr) { - return this->AcquireMemory(dims_, platform::MKLDNNGetDataType(), fmt_, - ptr, "@user_src_mem_p"); + std::shared_ptr AcquireSrcMemory(const Tensor* input) { + const T* input_data = input->data(); + return this->AcquireMemoryFromPrimitive(fwd_pd_->src_primitive_desc(), + to_void_cast(input_data), + "@src_mem_p"); } - std::shared_ptr AcquireDstMemory(void* ptr) { - return this->AcquireMemory(dims_, platform::MKLDNNGetDataType(), fmt_, - ptr, "@user_dst_mem_p"); + // TODO(jczaja): Move to MKLDNNHandler as common code + std::shared_ptr AcquireDstMemory(framework::Tensor* output) { + T* ptr = output->mutable_data(place_, + fwd_pd_->dst_primitive_desc().get_size()); + return this->AcquireMemoryFromPrimitive(fwd_pd_->dst_primitive_desc(), ptr, + "@dst_mem_p"); } - std::shared_ptr AcquireDiffDstMemory(void* ptr) { - return this->AcquireMemory(dims_, platform::MKLDNNGetDataType(), - diff_fmt_, ptr, "@user_diff_dst_mem_p"); + std::shared_ptr AcquireDstMemory(const Tensor* output) { + const T* output_data = output->data(); + return this->AcquireMemoryFromPrimitive(bwd_pd_->dst_primitive_desc(), + to_void_cast(output_data), + "@bwd-dst_mem_p"); } - std::shared_ptr AcquireDiffSrcMemory(void* ptr) { - return this->AcquireMemory(dims_, platform::MKLDNNGetDataType(), - diff_fmt_, ptr, "@user_diff_src_mem_p"); + std::shared_ptr AcquireDiffDstMemory(const Tensor* diffdst) { + const T* ptr = diffdst->data(); + return this->AcquireMemoryFromPrimitive(bwd_pd_->diff_dst_primitive_desc(), + to_void_cast(ptr), + "@diff_dst_mem_p"); } - std::shared_ptr AcquireDstMemoryFromPrimitive(void* ptr) { - this->AcquireSoftmaxPrimitiveDescriptor(); - return this->AcquireMemoryFromPrimitive(fwd_pd_->dst_primitive_desc(), ptr, - "@dst_mem_p"); + std::shared_ptr AcquireDiffSrcMemory( + framework::Tensor* diffsrc) { + T* ptr = diffsrc->mutable_data( + place_, bwd_pd_->diff_src_primitive_desc().get_size()); + return this->AcquireMemoryFromPrimitive(bwd_pd_->diff_src_primitive_desc(), + ptr, "@diff_src_mem_p"); } std::shared_ptr AcquireSoftmax( @@ -95,7 +112,6 @@ class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler { auto softmax_p = std::static_pointer_cast( dev_ctx_.GetBlob(prim_key)); if (softmax_p == nullptr) { - this->AcquireSoftmaxPrimitiveDescriptor(); softmax_p = std::make_shared( *fwd_pd_, *(static_cast(src_memory_p.get())), *(static_cast(dst_memory_p.get()))); @@ -113,20 +129,8 @@ class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler { auto softmax_bwd_p = std::static_pointer_cast( dev_ctx_.GetBlob(prim_key)); if (softmax_bwd_p == nullptr) { - auto data_softmax_md = - mkldnn::memory::desc(dims_, platform::MKLDNNGetDataType(), fmt_); - auto diff_softmax_md = mkldnn::memory::desc( - dims_, platform::MKLDNNGetDataType(), diff_fmt_); - // TODO(jczaja): Add support for other axes - auto softmax_bwd_desc = softmax_backward::desc( - diff_softmax_md, data_softmax_md, 1 /* dim: C*/); - this->AcquireSoftmaxPrimitiveDescriptor(); - auto softmax_bwd_pd = mkldnn::softmax_backward::primitive_desc( - softmax_bwd_desc, engine_, *fwd_pd_); - softmax_bwd_p = std::make_shared( - softmax_bwd_pd, *dst_memory_p, *diff_dst_memory_p, - *diff_src_memory_p); + *bwd_pd_, *dst_memory_p, *diff_dst_memory_p, *diff_src_memory_p); dev_ctx_.SetBlob(prim_key, softmax_bwd_p); } @@ -134,7 +138,8 @@ class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler { } protected: - void AcquireSoftmaxPrimitiveDescriptor(void) { + void AcquireSoftmaxPrimitiveDescriptor(const std::vector& dims, + const mkldnn::memory::format fmt) { // Softmax PD has to be passed to Grad op that // may be executed by diffrent thread, hence // for that one we use key that does not contain TID @@ -153,7 +158,7 @@ class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler { // forward_training // Normalization is made after innermost dimension eg. C out of NC auto md = - mkldnn::memory::desc(dims_, platform::MKLDNNGetDataType(), fmt_); + mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), fmt); auto softmax_desc = softmax_forward::desc(prop_kind::forward_scoring, md, 1 /*dim: C*/); fwd_pd_.reset( @@ -163,11 +168,33 @@ class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler { } } + void AcquireSoftmaxBackwardPrimitiveDescriptor( + const std::vector& dims, const mkldnn::memory::format fmt, + const mkldnn::memory::format diff_fmt) { + // Fwd_PD_ has to exists when to create BWD_PD_ + PADDLE_ENFORCE_NOT_NULL(fwd_pd_); + const std::string key_bwd_pd = key_ + "@softmax_bwd_pd"; + bwd_pd_ = + std::static_pointer_cast( + dev_ctx_.GetBlob(key_bwd_pd)); + if (bwd_pd_ == nullptr) { + auto data_softmax_md = + mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), fmt); + auto diff_softmax_md = mkldnn::memory::desc( + dims, platform::MKLDNNGetDataType(), diff_fmt); + // TODO(jczaja): Add support for other axes + auto backward_desc = softmax_backward::desc( + diff_softmax_md, data_softmax_md, 1 /* dim: C*/); + bwd_pd_.reset(new mkldnn::softmax_backward::primitive_desc( + backward_desc, engine_, *fwd_pd_)); + dev_ctx_.SetBlob(key_bwd_pd, bwd_pd_); + } + } + private: - std::vector dims_; - MKLDNNMemoryFormat fmt_; - MKLDNNMemoryFormat diff_fmt_; + platform::Place place_; std::shared_ptr fwd_pd_; + std::shared_ptr bwd_pd_; }; template @@ -177,44 +204,25 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()), "It must use CPUPlace."); auto& dev_ctx = ctx.template device_context(); - auto mkldnn_engine = dev_ctx.GetEngine(); const Tensor* input = ctx.Input("X"); Tensor* output = ctx.Output("Out"); PADDLE_ENFORCE_EQ( input->dims(), output->dims(), "The shape of softmax's input and output must be identical."); - // make sure 'output' holds memory, which will be shared by - // 'flattened_output' later. - output->mutable_data(ctx.GetPlace()); - // flatten input and output to 2-D matrixs auto dims = input->dims(); // input and output share the same shape auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1); - framework::Tensor flattened_input; - framework::Tensor flattened_output; - flattened_input.ShareDataWith(*input).Resize(flattened_dims); - flattened_output.ShareDataWith(*output).Resize(flattened_dims); - - const T* input_data = flattened_input.data(); - T* output_data = flattened_output.mutable_data(ctx.GetPlace()); auto src_tz = paddle::framework::vectorize(flattened_dims); auto dst_tz = src_tz; // Same memory descriptor to be used for input and output memory::dims softmax_tz = {src_tz[0], src_tz[1]}; - // Generate keys for storing/retriving primitives for this operator - const std::string key = - platform::GetHash(softmax_tz, ctx.op().Output("Out")); - SoftmaxMKLDNNHandler handler(softmax_tz, MKLDNNMemoryFormat::nc, dev_ctx, - mkldnn_engine, key); - + ctx.GetPlace(), ctx.op().Output("Out")); // Currently only NC data format is supported - auto softmax_src_memory_p = - handler.AcquireSrcMemory(to_void_cast(input_data)); - auto softmax_dst_memory_p = - handler.AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); + auto softmax_src_memory_p = handler.AcquireSrcMemory(input); + auto softmax_dst_memory_p = handler.AcquireDstMemory(output); auto softmax_p = handler.AcquireSoftmax(softmax_dst_memory_p, softmax_src_memory_p); @@ -222,6 +230,7 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { *(static_cast(softmax_p.get()))}; stream(stream::kind::eager).submit(pipeline).wait(); + T* output_data = output->mutable_data(ctx.GetPlace()); const bool is_test = ctx.Attr("is_test"); if (!is_test) { T threshold = exp(-64); @@ -230,6 +239,10 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { output_data[i] < threshold ? threshold : output_data[i]; } } + + output->set_layout(framework::DataLayout::kMKLDNN); + // Softmax output format is the same as input one + output->set_format(input->format()); } }; @@ -241,7 +254,6 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel { "It must use CPUPlace."); auto& dev_ctx = ctx.template device_context(); - auto mkldnn_engine = dev_ctx.GetEngine(); const Tensor* output = ctx.Input("Out"); auto* dout = ctx.template Input(framework::GradVarName("Out")); auto* dx = @@ -251,52 +263,25 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel { dout->dims(), dx->dims(), "The shape of softmax_grad's input and output must be identical."); - // make sure 'dx' holds memory, which will be shared by 'flattened_dx' - // later. - dx->template mutable_data(ctx.GetPlace()); - auto dims = dout->dims(); // input and output share the same shape auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1); - framework::Tensor flattened_output; - framework::Tensor flattened_dout; - framework::Tensor flattened_dx; - flattened_output.ShareDataWith(*output).Resize(flattened_dims); - flattened_dout.ShareDataWith(*dout).Resize(flattened_dims); - flattened_dx.ShareDataWith(*dx).Resize(flattened_dims); - - const T* dst_data = flattened_output.data(); - const T* diff_dst_ptr = flattened_dout.template data(); - T* diff_src_ptr = flattened_dx.template mutable_data(ctx.GetPlace()); - auto dst_tz = paddle::framework::vectorize(flattened_dims); - auto src_tz(dst_tz); + std::vector dst_tz = paddle::framework::vectorize(flattened_dims); + std::vector src_tz(dst_tz); // Same memory descriptor to be used for input and output memory::dims softmax_tz = {src_tz[0], src_tz[1]}; - // Currently only supports NC data format - // retrieve eltwise primitive desc from device context - const std::string key = - platform::GetHash(softmax_tz, ctx.op().Input("Out")); - const std::string key_softmax_pd = key + "@softmax_pd"; - - auto softmax_pd = - std::static_pointer_cast( - dev_ctx.GetBlob(key_softmax_pd)); - PADDLE_ENFORCE(softmax_pd != nullptr, - "Fail to find softmax_pd in device context"); // TODO(jczaja): Add layouts support when there is a need to do so // Two dimensional softmax does support NC format // Normalization is made after innermost dimension eg. C out of NC SoftmaxMKLDNNHandler handler(softmax_tz, MKLDNNMemoryFormat::nc, MKLDNNMemoryFormat::nc, dev_ctx, - mkldnn_engine, key); + ctx.GetPlace(), ctx.op().Input("Out")); - auto dst_memory_p = handler.AcquireDstMemory(to_void_cast(dst_data)); - auto diff_dst_memory_p = - handler.AcquireDiffDstMemory(to_void_cast(diff_dst_ptr)); - auto diff_src_memory_p = - handler.AcquireDiffSrcMemory(to_void_cast(diff_src_ptr)); + auto dst_memory_p = handler.AcquireDstMemory(output); + auto diff_dst_memory_p = handler.AcquireDiffDstMemory(dout); + auto diff_src_memory_p = handler.AcquireDiffSrcMemory(dx); // Get primitve from device context auto softmax_bwd_p = handler.AcquireSoftmaxBackward( -- GitLab