diff --git a/paddle/fluid/operators/conv_mkldnn_op.cc b/paddle/fluid/operators/conv_mkldnn_op.cc index 8abfb39a295c5880fbbc03ca7bec3c53b39143c8..64be8a4b16314d58fb84b734088b03c61d1e79d0 100644 --- a/paddle/fluid/operators/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/conv_mkldnn_op.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/operators/conv_op.h" #include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/framework/data_layout_transform.h" namespace paddle { namespace operators { @@ -274,7 +275,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { void Compute(const paddle::framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()), "It must use CPUPlace."); - +std::cout<<"this is conv kernel op....................."<("is_test"); auto& dev_ctx = @@ -286,11 +287,12 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { auto* bias = ctx.HasInput("Bias") ? ctx.Input("Bias") : nullptr; auto* output = ctx.Output("Output"); - bool is_INT8 = ctx.HasInput("Scale_in")? true : false; auto* scale_in = ctx.HasInput("Scale_in") ? ctx.Input("Scale_in") : nullptr; auto* scale_in_eltwise = ctx.HasInput("Scale_in_eltwise")? ctx.Input("Scale_in_eltwise") : nullptr; auto* scale_weights = ctx.HasInput("Scale_weights")? ctx.Input("Scale_weights") : nullptr; auto* scale_out = ctx.HasInput("Scale_out")? ctx.Input("Scale_out") : nullptr; + + bool is_INT8 = ctx.HasInput("Scale_in")? true : false; bool is_multi_channel = (is_INT8 && scale_weights->memory_size() > 1) ? true : false; PADDLE_ENFORCE(input->layout() == DataLayout::kMKLDNN && @@ -318,13 +320,15 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { bool fuse_residual_conn = ctx.Attr("fuse_residual_connection"); int groups = ctx.Attr("groups"); +std::cout<<"fuse_relu = "<data(); - const T* filter_data = filter->data(); + const float* filter_data = filter->data(); std::vector src_tz = paddle::framework::vectorize2int(input->dims()); std::vector weights_tz = @@ -344,17 +348,17 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { } std::vector dst_tz = paddle::framework::vectorize2int(output->dims()); - std::vector output_shift_scale; - T sum_scale = 1.0f; + std::vector output_shift_scale; + float sum_scale = 1.0f; if(is_INT8){ +std::cout<<"this is conv int8 op .............."<1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1; - T scale_in_data = *(scale_in->data()); - T scale_in_eltwise_data = *(scale_in_eltwise->data()); - std::vector scale_weights_data(count); + float scale_in_data = *(scale_in->data()); + std::vector scale_weights_data(count); for(int i=0; idata() + i); + scale_weights_data[i] =*(scale_weights->data() + i); } - T scale_out_data = *(scale_out->data()); + float scale_out_data = *(scale_out->data()); output_shift_scale.resize(count); for(int i=0; i { else output_shift_scale[i] = scale_out_data / (scale_in_data * scale_weights_data[i]); } - - sum_scale = scale_out_data / scale_in_eltwise_data; + if(fuse_residual_conn){ + float scale_in_eltwise_data = *(scale_in_eltwise->data()); + sum_scale = scale_out_data / scale_in_eltwise_data; + } } // Get unique name for storing MKLDNN primitives @@ -373,59 +379,76 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { ctx.op().Output("Output")); const std::string key_conv_pd = key + "@conv_pd"; - std::vector pipeline; +std::cout< pipeline; auto user_src_md = platform::MKLDNNMemDesc( - {src_tz}, platform::MKLDNNGetDataType(), input->format()); + {src_tz}, paddle::framework::ToMKLDNNDataType(input->type()), input->format()); auto user_weights_md = platform::MKLDNNMemDesc( - {weights_tz}, platform::MKLDNNGetDataType(), - (g == 1) ? filter->format() : mkldnn::memory::format::goihw); + {weights_tz}, platform::MKLDNNGetDataType(), + (g == 1) ? mkldnn::memory::format::oihw : mkldnn::memory::format::goihw); /* create memory descriptor for convolution without specified format * ('any') which lets a primitive (convolution in this case) choose * the memory format preferred for best performance */ std::string data_format = ctx.Attr("data_format"); - auto chosen_memory_format = + auto chosen_memory_format = platform::data_format_to_memory_format(data_format); auto src_md = platform::MKLDNNMemDesc( - src_tz, platform::MKLDNNGetDataType(), chosen_memory_format); + src_tz, platform::MKLDNNGetDataType(), chosen_memory_format); auto weights_md = platform::MKLDNNMemDesc( - weights_tz, platform::MKLDNNGetDataType(), + weights_tz, platform::MKLDNNGetDataType(), (g == 1) ? chosen_memory_format : mkldnn::memory::format::goihw); - std::vector bias_tz; // TODO(mgallus): avoid empty vector creation. - // Currently used whenever bias is != nullptr. auto dst_md = platform::MKLDNNMemDesc( - dst_tz, platform::MKLDNNGetDataType(), chosen_memory_format); + dst_tz, platform::MKLDNNGetDataType(), chosen_memory_format); + std::vector bias_tz; + + if(is_INT8){ + src_md = platform::MKLDNNMemDesc( + src_tz, memory::data_type::u8, chosen_memory_format); + weights_md = platform::MKLDNNMemDesc( + weights_tz, memory::data_type::s8, + (g == 1) ? chosen_memory_format : mkldnn::memory::format::goihw); + dst_md = platform::MKLDNNMemDesc( + dst_tz, + fuse_relu? paddle::framework::ToMKLDNNDataType(std::type_index(typeid(unsigned char))) : + paddle::framework::ToMKLDNNDataType(std::type_index(typeid(char))), + chosen_memory_format); + } // create a conv primitive descriptor and save it for usage in backward std::shared_ptr conv_pd; if (bias) { - bias_tz = paddle::framework::vectorize2int(bias->dims()); - auto bias_md = platform::MKLDNNMemDesc( - bias_tz, platform::MKLDNNGetDataType(), memory::format::x); - if(is_INT8){ - conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, bias_md, dst_md, - strides, paddings, mkldnn_engine, - fuse_relu, fuse_residual_conn, - output_shift_scale, sum_scale); - } else{ - conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, bias_md, dst_md, - strides, paddings, mkldnn_engine, - fuse_relu, fuse_residual_conn); - } + bias_tz = paddle::framework::vectorize2int(bias->dims()); + auto bias_md = platform::MKLDNNMemDesc( + bias_tz, platform::MKLDNNGetDataType(), memory::format::x); + if(is_INT8){ + bias_md = platform::MKLDNNMemDesc( + bias_tz, memory::data_type::s32, memory::format::x); + } + if(is_INT8){ + conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, bias_md, dst_md, + strides, paddings, mkldnn_engine, + fuse_relu, fuse_residual_conn, + output_shift_scale, sum_scale); + } else{ + conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, bias_md, dst_md, + strides, paddings, mkldnn_engine, + fuse_relu, fuse_residual_conn); + } } else { - if(is_INT8){ - conv_pd = - ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides, paddings, - mkldnn_engine, fuse_relu, fuse_residual_conn, - output_shift_scale, sum_scale); - } else{ - conv_pd = - ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides, paddings, - mkldnn_engine, fuse_relu, fuse_residual_conn); - } + if(is_INT8){ + conv_pd = + ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides, paddings, + mkldnn_engine, fuse_relu, fuse_residual_conn, + output_shift_scale, sum_scale); + } else{ + conv_pd = + ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides, paddings, + mkldnn_engine, fuse_relu, fuse_residual_conn); + } } // Save conv_pd/src_memory/weights_memory for backward pass dev_ctx.SetBlob(key_conv_pd, conv_pd); @@ -436,66 +459,104 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { auto user_src_memory_p = handler.AcquireSrcMemory(user_src_md, to_void_cast(input_data)); auto user_weights_memory_p = handler.AcquireWeightsMemory( - user_weights_md, to_void_cast(filter_data)); - - T* output_data = nullptr; - - if (fuse_residual_conn) { - auto residual_param = ctx.Input("ResidualData"); - auto residual_param_data = residual_param->data(); - - PADDLE_ENFORCE( - residual_param_data != nullptr, - "Provide data if you want MKLDNN conv+elementwise_add fusion"); - PADDLE_ENFORCE_EQ(output->dims(), residual_param->dims(), - "Output and elementwise parameter need to have the " - "same dimension sizes"); - - output->ShareDataWith(*residual_param); - output_data = output->mutable_data(ctx.GetPlace()); - } else { - output_data = - output->mutable_data(ctx.GetPlace(), handler.GetDstMemorySize()); - } + user_weights_md, to_void_cast(filter_data)); // create reorder primitive if the input format is not the preferred one auto src_memory_p = handler.AcquireSrcMemoryFromPrimitive(user_src_memory_p, pipeline); - auto weights_memory_p = handler.AcquireWeightsMemoryFromPrimitive( - user_weights_memory_p, pipeline, is_test); + std::shared_ptr weights_memory_p;// = handler.AcquireWeightsMemoryFromPrimitive( + //user_weights_memory_p, pipeline, is_test); if(is_INT8){ int mask_reorder = is_multi_channel? 0 : ((g!= 1) ? (1<<1)+(1<<0) : 1<<0); int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1; - std::vector scale_weights_data(count); + std::vector scale_weights_data(count); for(int i=0; idata() + i); + scale_weights_data[i] = *(scale_weights->data() + i); } - auto weights_memory_p = handler.AcquireWeightsMemoryFromPrimitive( + weights_memory_p = handler.AcquireWeightsMemoryFromPrimitive( user_weights_memory_p, pipeline, is_test, is_INT8, scale_weights_data, mask_reorder); + } else{ + weights_memory_p = handler.AcquireWeightsMemoryFromPrimitive( + user_weights_memory_p, pipeline, is_test); + } + + + std::shared_ptr dst_memory_p; + if(is_INT8){ + if (fuse_residual_conn) { + auto residual_param = ctx.Input("ResidualData"); + PADDLE_ENFORCE_EQ(output->dims(), residual_param->dims(), + "Output and elementwise parameter need to have the " + "same dimension sizes"); + + output->ShareDataWith(*residual_param); + if(fuse_relu){ + uint8_t* output_data = output->mutable_data(ctx.GetPlace()); + dst_memory_p = + handler.AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); + } else{ + int8_t* output_data = output->mutable_data(ctx.GetPlace()); + dst_memory_p = + handler.AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); + } + } else { + if(fuse_relu){ + uint8_t* output_data = output->mutable_data(ctx.GetPlace(), handler.GetDstMemorySize()); + dst_memory_p = + handler.AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); + } else{ + int8_t* output_data = output->mutable_data(ctx.GetPlace(), handler.GetDstMemorySize()); + dst_memory_p = + handler.AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); + } + } +std::cout<<"input fmt = "<format()<<" output fmt = "<format()<<" dst fmt = "<get_primitive_desc().desc().data.format<("ResidualData"); + auto residual_param_data = residual_param->data(); + + PADDLE_ENFORCE( + residual_param_data != nullptr, + "Provide data if you want MKLDNN conv+elementwise_add fusion"); + PADDLE_ENFORCE_EQ(output->dims(), residual_param->dims(), + "Output and elementwise parameter need to have the " + "same dimension sizes"); + + output->ShareDataWith(*residual_param); + output_data = output->mutable_data(ctx.GetPlace()); + } else { + output_data = + output->mutable_data(ctx.GetPlace(), handler.GetDstMemorySize()); + } + dst_memory_p = + handler.AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); } - auto dst_memory_p = - handler.AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); // create convolution op primitive std::shared_ptr conv_p; if (bias) { - const T* bias_data = bias->data(); + const float* bias_data = bias->data(); auto user_bias_md = platform::MKLDNNMemDesc( - {bias_tz}, platform::MKLDNNGetDataType(), memory::format::x); + {bias_tz}, platform::MKLDNNGetDataType(), memory::format::x); auto user_bias_memory_p = - handler.AcquireBiasMemory(user_bias_md, to_void_cast(bias_data)); - auto bias_memory_p = - handler.AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline); + handler.AcquireBiasMemory(user_bias_md, to_void_cast(bias_data)); + std::shared_ptr bias_memory_p;// = + //handler.AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline); if(is_INT8){ int mask_reorder = is_multi_channel? 0 : 1<<0; int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1; - std::vector scale_bias_data(count); + std::vector scale_bias_data(count); for(int i=0; idata()) * (*(scale_weights->data() + i)); + scale_bias_data[i] = (*scale_in->data()) * (*(scale_weights->data() + i)); } - auto bias_memory_p = + bias_memory_p = handler.AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline, is_INT8, scale_bias_data, mask_reorder); - } + } else{ + bias_memory_p = + handler.AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline); + } conv_p = handler.AcquireConvolution(src_memory_p, weights_memory_p, bias_memory_p, dst_memory_p); } else { @@ -503,17 +564,21 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { dst_memory_p); } + // push primitive to stream and wait until it's executed pipeline.push_back(*conv_p); stream(stream::kind::eager).submit(pipeline).wait(); output->set_layout(DataLayout::kMKLDNN); output->set_format(GetMKLDNNFormat(*dst_memory_p)); + +std::cout<<"input fmt = "<format()<<" output fmt = "<format()<<" dst fmt = "<get_primitive_desc().desc().data.format<<"output dt = "<type())<<"dst dt = "<get_primitive_desc().desc().data.data_type< output_shift_scale, T sum_scale) const { + const std::vector output_shift_scale, float sum_scale) const { mkldnn::primitive_attr conv_attr; mkldnn::post_ops post_operations; // Fusion with Elementwise layer relies on adding a sum post-operation with @@ -568,7 +633,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { const std::vector& paddings, const mkldnn::engine& engine, const bool fuse_relu, const bool fuse_residual_conn, - const std::vector output_shift_scale, const T sum_scale) const { + const std::vector output_shift_scale, const float sum_scale) const { memory::dims stride_dims = {strides[0], strides[1]}; memory::dims padding_dims = {paddings[0], paddings[1]}; @@ -617,7 +682,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { const std::vector& paddings, const mkldnn::engine& engine, const bool fuse_relu, const bool fuse_residual_conn, - const std::vector output_shift_scale, const T sum_scale) const { + const std::vector output_shift_scale, const float sum_scale) const { memory::dims stride_dims = {strides[0], strides[1]}; memory::dims padding_dims = {paddings[0], paddings[1]}; @@ -841,7 +906,8 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_KERNEL(conv2d, MKLDNN, ::paddle::platform::CPUPlace, - ops::ConvMKLDNNOpKernel); + ops::ConvMKLDNNOpKernel, + ops::ConvMKLDNNOpKernel); REGISTER_OP_KERNEL(conv2d_grad, MKLDNN, ::paddle::platform::CPUPlace, ops::ConvMKLDNNGradOpKernel); diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index 068ec7e46c0b213b0ba93ec2130ee74815c0d298..8a9253dea923af73492e09959e397270159f5eb2 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -94,10 +94,10 @@ framework::OpKernelType ConvOp::GetExpectedKernelType( auto input_data_type = framework::ToDataType(ctx.Input("Input")->type()); - auto filter_data_type = - framework::ToDataType(ctx.Input("Filter")->type()); - PADDLE_ENFORCE_EQ(input_data_type, filter_data_type, - "input and filter data type should be consistent"); + //auto filter_data_type = + // framework::ToDataType(ctx.Input("Filter")->type()); + //PADDLE_ENFORCE_EQ(input_data_type, filter_data_type, + // "input and filter data type should be consistent"); if (input_data_type == framework::proto::VarType::FP16) { PADDLE_ENFORCE_EQ(library, framework::LibraryType::kCUDNN, diff --git a/paddle/fluid/operators/dequantize_op.cc b/paddle/fluid/operators/dequantize_op.cc index 07a20bfae54745da8f4bd5eed6d036dc33dc3a72..5fe6010dba8a10846f9e5b2f6a8ba239b77839c1 100644 --- a/paddle/fluid/operators/dequantize_op.cc +++ b/paddle/fluid/operators/dequantize_op.cc @@ -40,15 +40,15 @@ class DeQuantOpKernel : public framework::OpKernel { auto* input = ctx.Input("Input"); auto* scale = ctx.Input("Scale"); auto* output = ctx.Output("Output"); - +std::cout<<"this is dequant op ***********"<(); const auto& engine = dev_ctx.GetEngine(); const T* input_data = input->data(); - T* output_data = output->mutable_data(ctx.GetPlace()); + float* output_data = output->mutable_data(ctx.GetPlace()); //T scale_data = *(scale->data()); - std::vector scale_data = {*(scale->data())}; + std::vector scale_data = {*(scale->data())}; std::vector pipeline; std::vector src_tz = paddle::framework::vectorize2int(input->dims()); @@ -69,7 +69,7 @@ class DeQuantOpKernel : public framework::OpKernel { auto dst_md = platform::MKLDNNMemDesc( {dst_tz}, memory::data_type::f32, memory::format::nchw); auto dst_pd = mkldnn::memory::primitive_desc(dst_md, engine); - auto dst_memory = mkldnn::memory(dst_pd, to_void_cast(output_data)); + auto dst_memory = mkldnn::memory(dst_pd, to_void_cast(output_data)); auto reorder_pd = std::shared_ptr( new reorder::primitive_desc(dst_pd, src_pd, attri)); @@ -112,5 +112,5 @@ namespace ops = paddle::operators; REGISTER_OPERATOR(dequantize, ops::DeQuantOp, ops::DeQuantOpMaker, paddle::framework::DefaultGradOpDescMaker); -REGISTER_OP_KERNEL(dequantize, MKLDNN, ::paddle::platform::CPUPlace, ops::DeQuantOpKernel); +REGISTER_OP_KERNEL(dequantize, MKLDNN, ::paddle::platform::CPUPlace, ops::DeQuantOpKernel); diff --git a/paddle/fluid/operators/pool_mkldnn_op.cc b/paddle/fluid/operators/pool_mkldnn_op.cc index 56cef91e29cc7da27384c27a7ec63e90cfadfc3b..d8799287ee197dd9a2b1f3eee7c16e5f4e3ec10f 100644 --- a/paddle/fluid/operators/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/pool_mkldnn_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/pool_op.h" #include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/framework/data_layout_transform.h" namespace paddle { namespace operators { @@ -71,7 +72,7 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { void Compute(const paddle::framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()), "It must use CPUPlace."); - +std::cout<<"this is pool op"<(); const auto& mkldnn_engine = dev_ctx.GetEngine(); @@ -129,14 +130,19 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { CorrectOutputSize(src_tz, dst_tz, ksize, paddings, strides, padding_right_bottom); } + + mkldnn::memory::data_type dt = paddle::framework::ToMKLDNNDataType(input->type()); + +std::cout<<"input type = "<(), input_format); + src_tz, dt, input_format); /* create memory descriptor for pooling without specified format * ('any') which lets a primitive (pooling in this case) choose * the memory format preferred for best performance */ - auto dst_md = platform::MKLDNNMemDesc(dst_tz, mkldnn::memory::f32, + auto dst_md = platform::MKLDNNMemDesc(dst_tz, dt, mkldnn::memory::format::any); std::shared_ptr pool_pd = @@ -399,6 +405,9 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_KERNEL(pool2d, MKLDNN, ::paddle::platform::CPUPlace, - ops::PoolMKLDNNOpKernel); + ops::PoolMKLDNNOpKernel, + ops::PoolMKLDNNOpKernel, + ops::PoolMKLDNNOpKernel); + REGISTER_OP_KERNEL(pool2d_grad, MKLDNN, ::paddle::platform::CPUPlace, ops::PoolMKLDNNGradOpKernel); diff --git a/paddle/fluid/operators/quantize_op.cc b/paddle/fluid/operators/quantize_op.cc index a18c6f74137bee726fe04cd069d4a293bedfaa60..cb5a9e4c1c8d6f94e8dce9eb753fb3763625baa4 100644 --- a/paddle/fluid/operators/quantize_op.cc +++ b/paddle/fluid/operators/quantize_op.cc @@ -37,7 +37,7 @@ class QuantOpKernel : public framework::OpKernel { auto* input = ctx.Input("Input"); auto* scale = ctx.Input("Scale"); auto* output = ctx.Output("Output"); - +std::cout<<"this is quantize op!!!!!!!!!!!!!!"<(); const auto& engine = dev_ctx.GetEngine(); @@ -68,7 +68,12 @@ class QuantOpKernel : public framework::OpKernel { auto reorder_pd = std::shared_ptr( new reorder::primitive_desc(dst_pd, src_pd, attri)); auto reorder_p= std::shared_ptr(new reorder(*reorder_pd, *src_memory_p, dst_memory)); + pipeline.push_back(*reorder_p); + stream(stream::kind::eager).submit(pipeline).wait(); + + output->set_layout(DataLayout::kMKLDNN); + output->set_format(GetMKLDNNFormat(dst_memory)); } }; diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index c5026194e0a162336797e56efa0f461620ed958c..6f2814df1ade3f2cb2f0a38de7ed43d52f682ae9 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -70,6 +70,7 @@ inline mkldnn::memory::desc MKLDNNMemDesc(const std::vector& dims, mkldnn::memory::data_type data_type, mkldnn::memory::format format) { mkldnn::memory::dims tz = dims; +std::cout<<"this is MKLDNNMemDesc"<<" data_type"<(dev_ctx_.GetBlob(local_key)); PADDLE_ENFORCE((mem_p != nullptr) || (is_reusing_ == false), "Fail to find mem primitive in device context"); + //mem_p = nullptr; if (mem_p == nullptr) { mem_p = std::make_shared(mdp, ptr); dev_ctx_.SetBlob(local_key, mem_p);