From fc9e1347602116be87301008790b14b9d85e5f40 Mon Sep 17 00:00:00 2001 From: xiaolil1 Date: Thu, 15 Nov 2018 00:12:11 +0800 Subject: [PATCH] revert conv for pr --- paddle/fluid/operators/conv_mkldnn_op.cc | 246 +++++++++++------------ 1 file changed, 113 insertions(+), 133 deletions(-) diff --git a/paddle/fluid/operators/conv_mkldnn_op.cc b/paddle/fluid/operators/conv_mkldnn_op.cc index e143c8411ef..f1ecfe41b96 100644 --- a/paddle/fluid/operators/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/conv_mkldnn_op.cc @@ -18,8 +18,6 @@ #include #include -#include "paddle/fluid/framework/data_layout_transform.h" - namespace paddle { namespace operators { @@ -118,6 +116,7 @@ class ConvMKLDNNHandler : public platform::MKLDNNHandler { "@data-weights_mem_p", pipeline); } + std::shared_ptr AcquireResidualDataMemory( const mkldnn::memory::desc& md, void* ptr) { return this->AcquireMemory(md, ptr, "@user_residual_data_mem_p"); @@ -131,7 +130,7 @@ class ConvMKLDNNHandler : public platform::MKLDNNHandler { this->AcquireDstMemoryFromPrimitive(dst_ptr), "@residual_data_mem_p", pipeline); } - + std::shared_ptr AcquireDiffSrcMemoryFromDataPrimitive( void* ptr) { return this->AcquireMemoryFromPrimitive( @@ -340,7 +339,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { std::vector paddings = ctx.Attr>("paddings"); std::vector dilations = ctx.Attr>("dilations"); bool fuse_relu = ctx.Attr("fuse_relu"); - bool force_fp32_output = ctx.Attr("force_fp32_output"); bool fuse_residual_conn = ctx.Attr("fuse_residual_connection"); int groups = ctx.Attr("groups"); @@ -375,34 +373,31 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { src_tz, weights_tz, strides, paddings, dilations, groups, ctx.op().Output("Output")); const std::string key_conv_pd = key + "@conv_pd"; - static std::unordered_map>> scale_map; + static std::unordered_map> scale_map; //scale_map.insert({key_conv_pd,{1.0f}}); //scale_map[key_conv_pd]={0.1f}; - bool scale_reuse = true; - //auto scale_in_key = key + "@scale_in"; - //auto scale_weights_key = key + "@scale_weights"; - //auto scale_out_key = key + "@scale_out"; - //auto output_shift_scale_key = key + "@output_shift_scale"; - //auto sum_scale_key = key + "@sum_scale"; - //auto scale_in_eltwise_key = key + "@scale_in_eltwise"; + bool scale_reuse = false; + auto scale_in_key = key + "@scale_in"; + auto scale_weights_key = key + "@scale_weights"; + auto scale_out_key = key + "@scale_out"; + auto output_shift_scale_key = key + "@output_shift_scale"; + auto sum_scale_key = key + "@sum_scale"; + auto scale_in_eltwise_key = key + "@scale_in_eltwise"; std::vector scale_in_data; std::vector scale_out_data; std::vector scale_weights_data; - std::vector scale_in_eltwise_data = {1.0f}; + std::vector scale_in_eltwise_data; std::vector output_shift_scale; std::vector sum_scale = {1.0f}; - std::vector scale_bias_data = {1.0f}; - std::vector> none_scale = {{0.0f}}; - std::vector> scale_datas(7,{1.0f}); + std::vector none_scale = {0}; - - if (is_INT8 && GetScaleMap(scale_map, key) == none_scale){ - scale_reuse = false; - } else{ - scale_datas = GetScaleMap(scale_map, key); + if (is_INT8 && GetScaleMap(scale_map, scale_in_key) == none_scale){ + scale_reuse = true; } +//std::cout<<"scale_reuse = "<1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1; scale_in_data = {*(scale_in->data())}; scale_weights_data.resize(count); @@ -411,8 +406,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { scale_weights_data[i] =*(scale_weights->data() + i); } scale_out_data = {*(scale_out->data())}; - if(force_fp32_output) - scale_out_data[0] = 1.0; output_shift_scale.resize(count); #pragma omp parallel for if (count > 1) for(int i=0; i { if(fuse_residual_conn){ scale_in_eltwise_data = {*(scale_in_eltwise->data())}; sum_scale[0] = scale_out_data[0] / scale_in_eltwise_data[0]; + SetScaleMap(scale_map, scale_in_eltwise_key, scale_in_eltwise_data); } //scale reuse - scale_datas[0] = scale_in_data; - scale_datas[1] = scale_in_eltwise_data; - scale_datas[2] = scale_weights_data; - scale_datas[4] = scale_out_data; - scale_datas[5] = output_shift_scale; - scale_datas[6] = sum_scale; + SetScaleMap(scale_map, scale_in_key, scale_in_data); + SetScaleMap(scale_map, scale_weights_key, scale_weights_data); + SetScaleMap(scale_map, scale_out_key, scale_out_data); + SetScaleMap(scale_map, output_shift_scale_key, output_shift_scale); + SetScaleMap(scale_map, sum_scale_key, sum_scale); } else{ - scale_in_data = scale_datas[0]; - scale_out_data = scale_datas[3]; - scale_weights_data = scale_datas[2]; + scale_in_data = GetScaleMap(scale_map, scale_in_key); + scale_out_data = GetScaleMap(scale_map, scale_out_key); + scale_weights_data = GetScaleMap(scale_map, scale_weights_key); if(fuse_residual_conn){ - scale_in_eltwise_data = scale_datas[1]; + scale_in_eltwise_data = GetScaleMap(scale_map, scale_in_eltwise_key); } - output_shift_scale = scale_datas[5]; - sum_scale = scale_datas[6]; + output_shift_scale = GetScaleMap(scale_map, output_shift_scale_key); + sum_scale = GetScaleMap(scale_map, sum_scale_key); + //printf("pause!!!"); } } - std::shared_ptr user_src_md; - std::shared_ptr user_weights_md; + std::vector pipeline; - user_src_md.reset(new mkldnn::memory::desc(platform::MKLDNNMemDesc( - {src_tz}, paddle::framework::ToMKLDNNDataType(input->type()), input->format()))); - user_weights_md.reset(new mkldnn::memory::desc(platform::MKLDNNMemDesc( - {weights_tz}, platform::MKLDNNGetDataType(), - (g == 1) ? mkldnn::memory::format::oihw : mkldnn::memory::format::goihw))); - + auto user_src_md = platform::MKLDNNMemDesc( + {src_tz}, paddle::framework::ToMKLDNNDataType(input->type()), input->format()); + auto user_weights_md = platform::MKLDNNMemDesc( + {weights_tz}, platform::MKLDNNGetDataType(), + (g == 1) ? mkldnn::memory::format::oihw : mkldnn::memory::format::goihw); + /* create memory descriptor for convolution without specified format * ('any') which lets a primitive (convolution in this case) choose * the memory format preferred for best performance @@ -465,60 +458,53 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { std::shared_ptr conv_pd; auto bias_tz = paddle::framework::vectorize2int(bias->dims()); - - std::shared_ptr src_md; - std::shared_ptr weights_md; - std::shared_ptr dst_md; - if(is_INT8){ - src_md.reset(new mkldnn::memory::desc(platform::MKLDNNMemDesc( - src_tz, memory::data_type::u8, chosen_memory_format))); - weights_md.reset(new mkldnn::memory::desc(platform::MKLDNNMemDesc( - weights_tz, memory::data_type::s8, chosen_memory_format))); - auto dst_dt = fuse_relu? paddle::framework::ToMKLDNNDataType(std::type_index(typeid(unsigned char))) : paddle::framework::ToMKLDNNDataType(std::type_index(typeid(signed char))); - if(fuse_residual_conn){ - auto residual = ctx.Input("ResidualData"); - auto residual_dt = paddle::framework::ToMKLDNNDataType(residual->type()); - if(dst_dt != residual_dt) - dst_dt = residual_dt; - } - if(force_fp32_output) - dst_dt = paddle::framework::ToMKLDNNDataType(std::type_index(typeid(float))); - dst_md.reset(new mkldnn::memory::desc(platform::MKLDNNMemDesc(dst_tz, dst_dt, chosen_memory_format))); + auto src_md = platform::MKLDNNMemDesc( + src_tz, memory::data_type::u8, chosen_memory_format); + auto weights_md = platform::MKLDNNMemDesc( + weights_tz, memory::data_type::s8, + (g == 1) ? chosen_memory_format : mkldnn::memory::format::goihw); + auto dst_dt = fuse_relu? paddle::framework::ToMKLDNNDataType(std::type_index(typeid(unsigned char))) : paddle::framework::ToMKLDNNDataType(std::type_index(typeid(signed char))); + if(fuse_residual_conn){ + auto residual = ctx.Input("ResidualData"); + auto residual_dt = paddle::framework::ToMKLDNNDataType(residual->type()); + if(dst_dt != residual_dt) + dst_dt = residual_dt; + } + auto dst_md = platform::MKLDNNMemDesc(dst_tz, dst_dt, chosen_memory_format); // create a conv primitive descriptor and save it for usage in backward if (bias) { - std::shared_ptr bias_md; - bias_md.reset(new mkldnn::memory::desc(platform::MKLDNNMemDesc( - bias_tz, memory::data_type::s32, memory::format::x))); - - conv_pd = ConvFwdPrimitiveDesc(*src_md, *weights_md, *bias_md, *dst_md, + auto bias_md = platform::MKLDNNMemDesc( + bias_tz, memory::data_type::s32, memory::format::x); + conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, bias_md, dst_md, strides, paddings, mkldnn_engine, fuse_relu, fuse_residual_conn, output_shift_scale, sum_scale[0], is_test); } else { conv_pd = - ConvFwdPrimitiveDesc(*src_md, *weights_md, *dst_md, strides, paddings, + ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides, paddings, mkldnn_engine, fuse_relu, fuse_residual_conn, output_shift_scale, sum_scale[0], is_test); } } else{ - src_md.reset(new mkldnn::memory::desc(platform::MKLDNNMemDesc( - src_tz, platform::MKLDNNGetDataType(), chosen_memory_format))); - weights_md.reset(new mkldnn::memory::desc(platform::MKLDNNMemDesc( - weights_tz, platform::MKLDNNGetDataType(), chosen_memory_format))); - dst_md.reset(new mkldnn::memory::desc(platform::MKLDNNMemDesc( - dst_tz, platform::MKLDNNGetDataType(), chosen_memory_format))); + auto src_md = platform::MKLDNNMemDesc( + src_tz, platform::MKLDNNGetDataType(), chosen_memory_format); + auto weights_md = platform::MKLDNNMemDesc( + weights_tz, platform::MKLDNNGetDataType(), + (g == 1) ? chosen_memory_format : mkldnn::memory::format::goihw); + auto dst_md = platform::MKLDNNMemDesc( + dst_tz, platform::MKLDNNGetDataType(), chosen_memory_format); + // create a conv primitive descriptor and save it for usage in backward if (bias) { - std::shared_ptr bias_md; - bias_md.reset(new mkldnn::memory::desc(platform::MKLDNNMemDesc( - bias_tz, platform::MKLDNNGetDataType(), memory::format::x))); - conv_pd = ConvFwdPrimitiveDesc(*src_md, *weights_md, *bias_md, *dst_md, - strides, paddings, mkldnn_engine, - fuse_relu, fuse_residual_conn, is_test); + auto bias_md = platform::MKLDNNMemDesc( + bias_tz, platform::MKLDNNGetDataType(), memory::format::x); + conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, bias_md, dst_md, + strides, paddings, mkldnn_engine, + fuse_relu, fuse_residual_conn, is_test); } else { - conv_pd = - ConvFwdPrimitiveDesc(*src_md, *weights_md, *dst_md, strides, paddings, + conv_pd = + ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides, paddings, mkldnn_engine, fuse_relu, fuse_residual_conn, is_test); } } @@ -527,10 +513,11 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { ConvMKLDNNHandler handler(conv_pd, dev_ctx, mkldnn_engine, key); + // create mkldnn memory from input tensors (data/weights) auto user_src_memory_p = - handler.AcquireSrcMemory(*user_src_md, to_void_cast(input_data)); + handler.AcquireSrcMemory(user_src_md, to_void_cast(input_data)); auto user_weights_memory_p = handler.AcquireWeightsMemory( - *user_weights_md, to_void_cast(filter_data)); + user_weights_md, to_void_cast(filter_data)); // create reorder primitive if the input format is not the preferred one auto src_memory_p = @@ -555,47 +542,42 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { "same dimension sizes"); auto residual_dt = paddle::framework::ToMKLDNNDataType(residual_param->type()); if(residual_param->format() != handler.GetDstFormat()) { - std::shared_ptr user_residual_md; auto residual_data_tz = paddle::framework::vectorize2int(residual_param->dims()); auto residual_data_type = paddle::framework::ToMKLDNNDataType(residual_param->type()); - user_residual_md.reset(new mkldnn::memory::desc(platform::MKLDNNMemDesc( - residual_data_tz, residual_data_type, residual_param->format()))); + auto user_residual_md = platform::MKLDNNMemDesc( + residual_data_tz, residual_data_type, residual_param->format()); if(is_INT8){ - PADDLE_ENFORCE( - force_fp32_output == false, - "Conv and sum does not support force_fp32_output"); - if(residual_dt == mkldnn::memory::data_type::u8){ - auto residual_param_data = residual_param->data(); - auto user_residual_memory_p = handler.AcquireResidualDataMemory( - *user_residual_md, to_void_cast(residual_param_data)); - PADDLE_ENFORCE( - residual_param_data != nullptr, - "Provide data if you want MKLDNN conv+elementwise_add fusion"); - uint8_t* output_data = output->mutable_data(ctx.GetPlace()); - dst_memory_p = - handler.AcquireDstMemoryFromResidualDataMemory( - user_residual_memory_p, to_void_cast(output_data), pipeline); + auto residual_param_data = residual_param->data(); + auto user_residual_memory_p = handler.AcquireResidualDataMemory( + user_residual_md, to_void_cast(residual_param_data)); + PADDLE_ENFORCE( + residual_param_data != nullptr, + "Provide data if you want MKLDNN conv+elementwise_add fusion"); + uint8_t* output_data = output->mutable_data(ctx.GetPlace()); + dst_memory_p = + handler.AcquireDstMemoryFromResidualDataMemory( + user_residual_memory_p, to_void_cast(output_data), pipeline); } else{ - auto residual_param_data = residual_param->data(); - auto user_residual_memory_p = handler.AcquireResidualDataMemory( - *user_residual_md, to_void_cast(residual_param_data)); - PADDLE_ENFORCE( - residual_param_data != nullptr, - "Provide data if you want MKLDNN conv+elementwise_add fusion"); - int8_t* output_data = output->mutable_data(ctx.GetPlace()); - dst_memory_p = - handler.AcquireDstMemoryFromResidualDataMemory( - user_residual_memory_p, to_void_cast(output_data), pipeline); + auto residual_param_data = residual_param->data(); + auto user_residual_memory_p = handler.AcquireResidualDataMemory( + user_residual_md, to_void_cast(residual_param_data)); + PADDLE_ENFORCE( + residual_param_data != nullptr, + "Provide data if you want MKLDNN conv+elementwise_add fusion"); + int8_t* output_data = output->mutable_data(ctx.GetPlace()); + dst_memory_p = + handler.AcquireDstMemoryFromResidualDataMemory( + user_residual_memory_p, to_void_cast(output_data), pipeline); if(fuse_relu) need_s8_to_u8 = true; } } else{ auto residual_param_data = residual_param->data(); auto user_residual_memory_p = handler.AcquireResidualDataMemory( - *user_residual_md, to_void_cast(residual_param_data)); + user_residual_md, to_void_cast(residual_param_data)); PADDLE_ENFORCE( residual_param_data != nullptr, "Provide data if you want MKLDNN conv+elementwise_add fusion"); @@ -608,6 +590,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { output->ShareDataWith(*residual_param); if(is_INT8){ if(residual_dt == mkldnn::memory::data_type::u8){ + uint8_t* output_data = output->mutable_data(ctx.GetPlace()); dst_memory_p = handler.AcquireDstMemoryFromPrimitive(to_void_cast(output_data)); @@ -625,7 +608,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { } } } else { - if(is_INT8 && !force_fp32_output){ + if(is_INT8){ if(fuse_relu){ uint8_t* output_data = output->mutable_data(ctx.GetPlace(), handler.GetDstMemorySize()); dst_memory_p = @@ -645,29 +628,27 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { // create convolution op primitive std::shared_ptr conv_p; + std::vector scale_bias_data; + auto scale_bias_key = key + "@scale_bias"; if (bias) { const float* bias_data = bias->data(); - std::shared_ptr user_bias_md; - user_bias_md.reset(new mkldnn::memory::desc(platform::MKLDNNMemDesc( - {bias_tz}, platform::MKLDNNGetDataType(), memory::format::x))); + auto user_bias_md = platform::MKLDNNMemDesc( + {bias_tz}, platform::MKLDNNGetDataType(), memory::format::x); auto user_bias_memory_p = - handler.AcquireBiasMemory(*user_bias_md, to_void_cast(bias_data)); + handler.AcquireBiasMemory(user_bias_md, to_void_cast(bias_data)); std::shared_ptr bias_memory_p; if(is_INT8){ int mask_reorder = is_multi_channel? 1<<0 : 1; - if(!scale_reuse){ + if(scale_reuse){ int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1; scale_bias_data.resize(count); #pragma omp parallel for if (count > 1) for(int i=0; i { dst_memory_p); } - SetScaleMap(scale_map, key, scale_datas); // push primitive to stream and wait until it's executed pipeline.push_back(*conv_p); stream(stream::kind::eager).submit(pipeline).wait(); - if(need_s8_to_u8 && !force_fp32_output){ + if(need_s8_to_u8){ output->mutable_data(ctx.GetPlace()); } @@ -698,24 +678,24 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { private: - void SetScaleMap(std::unordered_map>> &scale_map, - const std::string& name, std::vector> scale_datas) const { + void SetScaleMap(std::unordered_map> &scale_map, + const std::string& name, std::vector scale_data) const { auto it = scale_map.find(name); if (it == scale_map.end()) { - scale_map[name] = scale_datas; // create new blob + scale_map[name] = scale_data; // create new blob } else { - (*it).second = scale_datas; // set data to existing blob + (*it).second = scale_data; // set data to existing blob } return; } - std::vector> GetScaleMap(std::unordered_map>> scale_map, + std::vector GetScaleMap(std::unordered_map> &scale_map, const std::string& name) const { auto it = scale_map.find(name); if (it != scale_map.end()) { return (*it).second; } - return {{0.0f}}; + return {0}; } mkldnn::primitive_attr CreatePostOps(bool fuse_relu, bool fuse_residual_conn, -- GitLab