diff --git a/paddle/fluid/operators/conv_mkldnn_op.cc b/paddle/fluid/operators/conv_mkldnn_op.cc index 1f3e3f5e5f422c813cbf386048e61ba3ab5b8d6c..758a8f4a41f686aa9ad5ee965d0586df5b89476f 100644 --- a/paddle/fluid/operators/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/conv_mkldnn_op.cc @@ -369,153 +369,191 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { ctx.op().Output("Output")); const std::string key_conv_pd = key + "@conv_pd"; - std::vector pipeline; - bool is_INT8 = ctx.HasInput("Scale_in")? true : false; - if(!is_INT8){ - auto user_src_md = platform::MKLDNNMemDesc( - {src_tz}, platform::MKLDNNGetDataType(), input->format()); - auto user_weights_md = platform::MKLDNNMemDesc( - {weights_tz}, platform::MKLDNNGetDataType(), - (g == 1) ? mkldnn::memory::format::oihw : mkldnn::memory::format::goihw); - - /* create memory descriptor for convolution without specified format - * ('any') which lets a primitive (convolution in this case) choose - * the memory format preferred for best performance - */ - std::string data_format = ctx.Attr("data_format"); - auto chosen_memory_format = - platform::data_format_to_memory_format(data_format); - - auto src_md = platform::MKLDNNMemDesc( - src_tz, platform::MKLDNNGetDataType(), chosen_memory_format); - auto weights_md = platform::MKLDNNMemDesc( - weights_tz, platform::MKLDNNGetDataType(), chosen_memory_format); - std::vector bias_tz; // TODO(mgallus): avoid empty vector creation. - // Currently used whenever bias is != nullptr. - - auto dst_md = platform::MKLDNNMemDesc( - dst_tz, platform::MKLDNNGetDataType(), chosen_memory_format); - - // create a conv primitive descriptor and save it for usage in backward - std::shared_ptr conv_pd; - - if (bias) { - bias_tz = paddle::framework::vectorize2int(bias->dims()); - auto bias_md = platform::MKLDNNMemDesc( - bias_tz, platform::MKLDNNGetDataType(), memory::format::x); - conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, bias_md, dst_md, - strides, paddings, mkldnn_engine, - fuse_relu, fuse_residual_conn); - } else { - conv_pd = - ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides, paddings, - mkldnn_engine, fuse_relu, fuse_residual_conn); - } - // Save conv_pd/src_memory/weights_memory for backward pass - dev_ctx.SetBlob(key_conv_pd, conv_pd); - - ConvMKLDNNHandler handler(conv_pd, dev_ctx, mkldnn_engine, key); - - // create mkldnn memory from input tensors (data/weights) - auto user_src_memory_p = - handler.AcquireSrcMemory(user_src_md, to_void_cast(input_data)); - auto user_weights_memory_p = handler.AcquireWeightsMemory( - user_weights_md, to_void_cast(filter_data)); - - // create reorder primitive if the input format is not the preferred one - auto src_memory_p = - handler.AcquireSrcMemoryFromPrimitive(user_src_memory_p, pipeline); - auto weights_memory_p = handler.AcquireWeightsMemoryFromPrimitive( - user_weights_memory_p, pipeline, is_test); - - std::shared_ptr dst_memory_p; + + bool need_s8_to_u8 = false; + if (fuse_residual_conn && is_INT8 && fuse_relu) { + need_s8_to_u8 = true; + } + std::shared_ptr conv_p; + std::shared_ptr src_memory_p; + std::shared_ptr dst_memory_p; + std::vector pipeline; + + auto prim_key = key + "@conv_p"; + auto dst_key = key + "@dst_mem_p"; + auto src_key = key + "@src_mem_p"; + conv_p = std::static_pointer_cast(dev_ctx.GetBlob(prim_key)); + src_memory_p = std::static_pointer_cast(dev_ctx.GetBlob(src_key)); + dst_memory_p = std::static_pointer_cast(dev_ctx.GetBlob(dst_key)); + + if (src_memory_p) { + src_memory_p->set_data_handle(to_void_cast(input_data)); + } + + std::shared_ptr conv_pd; + conv_pd = std::static_pointer_cast(dev_ctx.GetBlob(key_conv_pd)); + std::shared_ptr handler; + if(conv_pd){ + handler.reset(new ConvMKLDNNHandler(conv_pd, dev_ctx, mkldnn_engine, key)); + } + if (!is_INT8 && dst_memory_p){ if (fuse_residual_conn) { auto residual_param = ctx.Input("ResidualData"); auto residual_param_data = residual_param->data(); - - PADDLE_ENFORCE( - residual_param_data != nullptr, - "Provide data if you want MKLDNN conv+elementwise_add fusion"); - PADDLE_ENFORCE_EQ(output->dims(), residual_param->dims(), - "Output and elementwise parameter need to have the " - "same dimension sizes"); - - if (residual_param->format() != handler.GetDstFormat()) { + if (residual_param->format() != handler->GetDstFormat()) { auto output_data = - output->mutable_data(ctx.GetPlace(), ::paddle::memory::Allocator::kDefault, handler.GetDstMemorySize()); + output->mutable_data(ctx.GetPlace(), ::paddle::memory::Allocator::kDefault, handler->GetDstMemorySize()); auto residual_data_tz = paddle::framework::vectorize2int(residual_param->dims()); auto residual_data_type = paddle::framework::ToMKLDNNDataType(residual_param->type()); - + auto user_residual_md = platform::MKLDNNMemDesc( residual_data_tz, residual_data_type, residual_param->format()); - auto user_residual_memory_p = handler.AcquireResidualDataMemory( + auto user_residual_memory_p = handler->AcquireResidualDataMemory( user_residual_md, to_void_cast(residual_param_data)); - - dst_memory_p = handler.AcquireDstMemoryFromResidualDataMemory( + + dst_memory_p = handler->AcquireDstMemoryFromResidualDataMemory( user_residual_memory_p, to_void_cast(output_data), pipeline); } else { output->ShareDataWith(*residual_param); auto output_data = output->mutable_data(ctx.GetPlace()); - dst_memory_p = - handler.AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); + dst_memory_p->set_data_handle(to_void_cast(output_data)); } } else { auto output_data = - output->mutable_data(ctx.GetPlace(), ::paddle::memory::Allocator::kDefault, handler.GetDstMemorySize()); - dst_memory_p = - handler.AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); + output->mutable_data(ctx.GetPlace(), ::paddle::memory::Allocator::kDefault, handler->GetDstMemorySize()); + dst_memory_p->set_data_handle(to_void_cast(output_data)); } + } - // create convolution op primitive - std::shared_ptr conv_p; - if (bias) { - const T* bias_data = bias->data(); - auto user_bias_md = platform::MKLDNNMemDesc( - {bias_tz}, platform::MKLDNNGetDataType(), memory::format::x); - auto user_bias_memory_p = - handler.AcquireBiasMemory(user_bias_md, to_void_cast(bias_data)); - - auto bias_memory_p = - handler.AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline); - conv_p = handler.AcquireConvolution(src_memory_p, weights_memory_p, - bias_memory_p, dst_memory_p); - } else { - conv_p = handler.AcquireConvolution(src_memory_p, weights_memory_p, - dst_memory_p); - } + if(!is_INT8){ + if(conv_p == nullptr){ + auto user_src_md = platform::MKLDNNMemDesc( + {src_tz}, platform::MKLDNNGetDataType(), input->format()); + auto user_weights_md = platform::MKLDNNMemDesc( + {weights_tz}, platform::MKLDNNGetDataType(), + (g == 1) ? mkldnn::memory::format::oihw : mkldnn::memory::format::goihw); - // push primitive to stream and wait until it's executed - pipeline.push_back(*conv_p); - stream(stream::kind::eager).submit(pipeline).wait(); + /* create memory descriptor for convolution without specified format + * ('any') which lets a primitive (convolution in this case) choose + * the memory format preferred for best performance + */ + std::string data_format = ctx.Attr("data_format"); + auto chosen_memory_format = + platform::data_format_to_memory_format(data_format); - output->set_layout(DataLayout::kMKLDNN); - output->set_format(GetMKLDNNFormat(*dst_memory_p)); + auto src_md = platform::MKLDNNMemDesc( + src_tz, platform::MKLDNNGetDataType(), chosen_memory_format); + auto weights_md = platform::MKLDNNMemDesc( + weights_tz, platform::MKLDNNGetDataType(), chosen_memory_format); + std::vector bias_tz; // TODO(mgallus): avoid empty vector creation. + // Currently used whenever bias is != nullptr. - } else{ - bool need_s8_to_u8 = false; - if (fuse_residual_conn && fuse_relu) { - need_s8_to_u8 = true; - } - std::shared_ptr conv_p; - std::shared_ptr src_memory_p; - std::shared_ptr dst_memory_p; - std::vector pipeline; - - auto prim_key = key + "@conv_p"; - auto dst_key = key + "@dst_mem_p"; - auto src_key = key + "@src_mem_p"; - conv_p = std::static_pointer_cast(dev_ctx.GetBlob(prim_key)); - src_memory_p = std::static_pointer_cast(dev_ctx.GetBlob(src_key)); - dst_memory_p = std::static_pointer_cast(dev_ctx.GetBlob(dst_key)); - - if (src_memory_p) { - src_memory_p->set_data_handle(to_void_cast(input_data)); + auto dst_md = platform::MKLDNNMemDesc( + dst_tz, platform::MKLDNNGetDataType(), chosen_memory_format); + + // create a conv primitive descriptor and save it for usage in backward + if (bias) { + bias_tz = paddle::framework::vectorize2int(bias->dims()); + auto bias_md = platform::MKLDNNMemDesc( + bias_tz, platform::MKLDNNGetDataType(), memory::format::x); + conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, bias_md, dst_md, + strides, paddings, mkldnn_engine, + fuse_relu, fuse_residual_conn); + } else { + conv_pd = + ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides, paddings, + mkldnn_engine, fuse_relu, fuse_residual_conn); + } + // Save conv_pd/src_memory/weights_memory for backward pass + dev_ctx.SetBlob(key_conv_pd, conv_pd); + + handler.reset(new ConvMKLDNNHandler(conv_pd, dev_ctx, mkldnn_engine, key)); + + // create mkldnn memory from input tensors (data/weights) + auto user_src_memory_p = + handler->AcquireSrcMemory(user_src_md, to_void_cast(input_data)); + auto user_weights_memory_p = handler->AcquireWeightsMemory( + user_weights_md, to_void_cast(filter_data)); + + // create reorder primitive if the input format is not the preferred one + src_memory_p = + handler->AcquireSrcMemoryFromPrimitive(user_src_memory_p, pipeline); + auto weights_memory_p = handler->AcquireWeightsMemoryFromPrimitive( + user_weights_memory_p, pipeline, is_test); + + if (fuse_residual_conn) { + auto residual_param = ctx.Input("ResidualData"); + auto residual_param_data = residual_param->data(); + + PADDLE_ENFORCE( + residual_param_data != nullptr, + "Provide data if you want MKLDNN conv+elementwise_add fusion"); + PADDLE_ENFORCE_EQ(output->dims(), residual_param->dims(), + "Output and elementwise parameter need to have the " + "same dimension sizes"); + + if (residual_param->format() != handler->GetDstFormat()) { + auto output_data = + output->mutable_data(ctx.GetPlace(), ::paddle::memory::Allocator::kDefault, handler->GetDstMemorySize()); + auto residual_data_tz = + paddle::framework::vectorize2int(residual_param->dims()); + auto residual_data_type = + paddle::framework::ToMKLDNNDataType(residual_param->type()); + + auto user_residual_md = platform::MKLDNNMemDesc( + residual_data_tz, residual_data_type, residual_param->format()); + auto user_residual_memory_p = handler->AcquireResidualDataMemory( + user_residual_md, to_void_cast(residual_param_data)); + + dst_memory_p = handler->AcquireDstMemoryFromResidualDataMemory( + user_residual_memory_p, to_void_cast(output_data), pipeline); + } else { + output->ShareDataWith(*residual_param); + auto output_data = output->mutable_data(ctx.GetPlace()); + dst_memory_p = + handler->AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); + } + } else { + auto output_data = + output->mutable_data(ctx.GetPlace(), ::paddle::memory::Allocator::kDefault, handler->GetDstMemorySize()); + dst_memory_p = + handler->AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); + } + + // create convolution op primitive + if (bias) { + const T* bias_data = bias->data(); + auto user_bias_md = platform::MKLDNNMemDesc( + {bias_tz}, platform::MKLDNNGetDataType(), memory::format::x); + auto user_bias_memory_p = + handler->AcquireBiasMemory(user_bias_md, to_void_cast(bias_data)); + + auto bias_memory_p = + handler->AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline, is_test); + conv_p = handler->AcquireConvolution(src_memory_p, weights_memory_p, + bias_memory_p, dst_memory_p); + } else { + conv_p = handler->AcquireConvolution(src_memory_p, weights_memory_p, + dst_memory_p); + } + // push primitive to stream and wait until it's executed + pipeline.push_back(*conv_p); + stream(stream::kind::eager).submit(pipeline).wait(); + + output->set_layout(DataLayout::kMKLDNN); + output->set_format(GetMKLDNNFormat(*dst_memory_p)); + } else { + pipeline.push_back(*conv_p); + stream(stream::kind::eager).submit(pipeline).wait(); + + output->set_layout(DataLayout::kMKLDNN); + output->set_format(GetMKLDNNFormat(*dst_memory_p)); } - + } else{ if(conv_p == nullptr){ auto* scale_in = ctx.HasInput("Scale_in") ? ctx.Input("Scale_in") : nullptr; auto* scale_in_eltwise = ctx.HasInput("Scale_in_eltwise")? ctx.Input("Scale_in_eltwise") : nullptr; @@ -621,8 +659,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { auto dst_md = platform::MKLDNNMemDesc(dst_tz, dst_dt, chosen_memory_format); // create a conv primitive descriptor and save it for usage in backward - std::shared_ptr conv_pd; - if (bias) { auto bias_md = platform::MKLDNNMemDesc( bias_tz, memory::data_type::s32, memory::format::x); @@ -639,21 +675,21 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { // Save conv_pd/src_memory/weights_memory for backward pass dev_ctx.SetBlob(key_conv_pd, conv_pd); - ConvMKLDNNHandler handler(conv_pd, dev_ctx, mkldnn_engine, key); + handler.reset(new ConvMKLDNNHandler(conv_pd, dev_ctx, mkldnn_engine, key)); // create mkldnn memory from input tensors (data/weights) auto user_src_memory_p = - handler.AcquireSrcMemory(user_src_md, to_void_cast(input_data)); - auto user_weights_memory_p = handler.AcquireWeightsMemory( + handler->AcquireSrcMemory(user_src_md, to_void_cast(input_data)); + auto user_weights_memory_p = handler->AcquireWeightsMemory( user_weights_md, to_void_cast(filter_data)); // create reorder primitive if the input format is not the preferred one src_memory_p = - handler.AcquireSrcMemoryFromPrimitive(user_src_memory_p, pipeline); + handler->AcquireSrcMemoryFromPrimitive(user_src_memory_p, pipeline); std::shared_ptr weights_memory_p; int mask_reorder = is_multi_channel? ((g!= 1) ? (1<<1)+(1<<0) : 1<<0) : 0; - weights_memory_p = handler.AcquireWeightsMemoryFromPrimitive( + weights_memory_p = handler->AcquireWeightsMemoryFromPrimitive( user_weights_memory_p, pipeline, is_test, is_INT8, scale_weights_data, mask_reorder); if(fuse_residual_conn) { @@ -662,27 +698,27 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { "Output and elementwise parameter need to have the " "same dimension sizes"); auto residual_dt = paddle::framework::ToMKLDNNDataType(residual_param->type()); - PADDLE_ENFORCE_EQ(residual_param->format(), handler.GetDstFormat(), + PADDLE_ENFORCE_EQ(residual_param->format(), handler->GetDstFormat(), "Conv input dimension and filter dimension should be the same."); output->ShareDataWith(*residual_param); if(residual_dt == mkldnn::memory::data_type::u8){ uint8_t* output_data = output->mutable_data(ctx.GetPlace()); dst_memory_p = - handler.AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); + handler->AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); } else{ int8_t* output_data = output->mutable_data(ctx.GetPlace()); dst_memory_p = - handler.AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); + handler->AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); } } else { if(fuse_relu){ - uint8_t* output_data = output->mutable_data(ctx.GetPlace(), ::paddle::memory::Allocator::kDefault, handler.GetDstMemorySize()); + uint8_t* output_data = output->mutable_data(ctx.GetPlace(), ::paddle::memory::Allocator::kDefault, handler->GetDstMemorySize()); dst_memory_p = - handler.AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); + handler->AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); } else{ - int8_t* output_data = output->mutable_data(ctx.GetPlace(), ::paddle::memory::Allocator::kDefault, handler.GetDstMemorySize()); + int8_t* output_data = output->mutable_data(ctx.GetPlace(), ::paddle::memory::Allocator::kDefault, handler->GetDstMemorySize()); dst_memory_p = - handler.AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); + handler->AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); } } @@ -694,7 +730,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { auto user_bias_md = platform::MKLDNNMemDesc( {bias_tz}, platform::MKLDNNGetDataType(), memory::format::x); auto user_bias_memory_p = - handler.AcquireBiasMemory(user_bias_md, to_void_cast(bias_data)); + handler->AcquireBiasMemory(user_bias_md, to_void_cast(bias_data)); std::shared_ptr bias_memory_p; int mask_reorder = is_multi_channel? 1<<0 : 1; if(!scale_reuse){ @@ -709,11 +745,11 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { scale_bias_data = GetScaleMap(scale_map, scale_bias_key); } bias_memory_p = - handler.AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline, is_test, is_INT8, scale_bias_data, mask_reorder); - conv_p = handler.AcquireConvolution(src_memory_p, weights_memory_p, + handler->AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline, is_test, is_INT8, scale_bias_data, mask_reorder); + conv_p = handler->AcquireConvolution(src_memory_p, weights_memory_p, bias_memory_p, dst_memory_p); } else { - conv_p = handler.AcquireConvolution(src_memory_p, weights_memory_p, + conv_p = handler->AcquireConvolution(src_memory_p, weights_memory_p, dst_memory_p); } @@ -735,7 +771,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { if (need_s8_to_u8) { output->mutable_data(ctx.GetPlace()); } - + output->set_layout(DataLayout::kMKLDNN); output->set_format(GetMKLDNNFormat(*dst_memory_p)); }