diff --git a/paddle/fluid/operators/conv_mkldnn_op.cc b/paddle/fluid/operators/conv_mkldnn_op.cc index 0393daf4ac36ab1a1d7531fed0895e0f2b174d28..a396a7aec211238e2ed3bd62e2348842fb467c60 100644 --- a/paddle/fluid/operators/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/conv_mkldnn_op.cc @@ -15,6 +15,8 @@ #include "paddle/fluid/operators/conv_op.h" #include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/framework/data_layout_transform.h" +#include +#include namespace paddle { namespace operators { @@ -346,36 +348,76 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { } std::vector dst_tz = paddle::framework::vectorize2int(output->dims()); + // Get unique name for storing MKLDNN primitives + const std::string key = ConvMKLDNNHandler::GetHash( + src_tz, weights_tz, strides, paddings, dilations, groups, + ctx.op().Output("Output")); + const std::string key_conv_pd = key + "@conv_pd"; + static std::unordered_map> scale_map; + //scale_map.insert({key_conv_pd,{1.0f}}); + //scale_map[key_conv_pd]={0.1f}; + bool scale_reuse = false; + auto scale_in_key = key + "@scale_in"; + auto scale_weights_key = key + "@scale_weights"; + auto scale_out_key = key + "@scale_out"; + auto output_shift_scale_key = key + "@output_shift_scale"; + auto sum_scale_key = key + "@sum_scale"; + auto scale_in_eltwise_key = key + "@scale_in_eltwise"; + std::vector scale_in_data; + std::vector scale_out_data; + std::vector scale_weights_data; + std::vector scale_in_eltwise_data; std::vector output_shift_scale; - float sum_scale = 1.0f; + std::vector sum_scale = {1.0f}; + std::vector none_scale = {0}; + if (is_INT8 && GetScaleMap(scale_map, scale_in_key) == none_scale){ + scale_reuse = true; + } +//std::cout<<"scale_reuse = "<1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1; - float scale_in_data = *(scale_in->data()); - - std::vector scale_weights_data(count); - for(int i=0; idata() + i); - } - float scale_out_data = *(scale_out->data()); - output_shift_scale.resize(count); - for(int i=0; idata()); - sum_scale = scale_out_data / scale_in_eltwise_data; + if(scale_reuse){ +//std::cout<<"load scale!!!!!!!!"<1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1; + scale_in_data = {*(scale_in->data())}; + scale_weights_data.resize(count); + for(int i=0; idata() + i); + } + scale_out_data = {*(scale_out->data())}; + output_shift_scale.resize(count); + for(int i=0; idata())}; + sum_scale[0] = scale_out_data[0] / scale_in_eltwise_data[0]; + SetScaleMap(scale_map, scale_in_eltwise_key, scale_in_eltwise_data); + } + + //scale reuse + SetScaleMap(scale_map, scale_in_key, scale_in_data); + SetScaleMap(scale_map, scale_weights_key, scale_weights_data); + SetScaleMap(scale_map, scale_out_key, scale_out_data); + SetScaleMap(scale_map, output_shift_scale_key, output_shift_scale); + SetScaleMap(scale_map, sum_scale_key, sum_scale); + } else{ + scale_in_data = GetScaleMap(scale_map, scale_in_key); + scale_out_data = GetScaleMap(scale_map, scale_out_key); + scale_weights_data = GetScaleMap(scale_map, scale_weights_key); + if(fuse_residual_conn){ + scale_in_eltwise_data = GetScaleMap(scale_map, scale_in_eltwise_key); + } + output_shift_scale = GetScaleMap(scale_map, output_shift_scale_key); + sum_scale = GetScaleMap(scale_map, sum_scale_key); + //printf("pause!!!"); } + } - // Get unique name for storing MKLDNN primitives - const std::string key = ConvMKLDNNHandler::GetHash( - src_tz, weights_tz, strides, paddings, dilations, groups, - ctx.op().Output("Output")); - const std::string key_conv_pd = key + "@conv_pd"; std::vector pipeline; auto user_src_md = platform::MKLDNNMemDesc( @@ -431,7 +473,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, bias_md, dst_md, strides, paddings, mkldnn_engine, fuse_relu, fuse_residual_conn, - output_shift_scale, sum_scale); + output_shift_scale, sum_scale[0]); } else{ conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, bias_md, dst_md, strides, paddings, mkldnn_engine, @@ -442,7 +484,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides, paddings, mkldnn_engine, fuse_relu, fuse_residual_conn, - output_shift_scale, sum_scale); + output_shift_scale, sum_scale[0]); } else{ conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides, paddings, @@ -466,11 +508,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { std::shared_ptr weights_memory_p; if(is_INT8){ int mask_reorder = is_multi_channel? ((g!= 1) ? (1<<1)+(1<<0) : 1<<0) : 0; - int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1; - std::vector scale_weights_data(count); - for(int i=0; idata() + i); - } weights_memory_p = handler.AcquireWeightsMemoryFromPrimitive( user_weights_memory_p, pipeline, is_test, is_INT8, scale_weights_data, mask_reorder); } else{ @@ -536,6 +573,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { // create convolution op primitive std::shared_ptr conv_p; + std::vector scale_bias_data; + auto scale_bias_key = key + "@scale_bias"; if (bias) { const float* bias_data = bias->data(); auto user_bias_md = platform::MKLDNNMemDesc( @@ -545,10 +584,15 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { std::shared_ptr bias_memory_p; if(is_INT8){ int mask_reorder = is_multi_channel? 1<<0 : 1; - int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1; - std::vector scale_bias_data(count); - for(int i=0; idata()) * (*(scale_weights->data() + i)); + if(scale_reuse){ + int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1; + scale_bias_data.resize(count); + for(int i=0; i { } private: + + void SetScaleMap(std::unordered_map> &scale_map, + const std::string& name, std::vector scale_data) const { + auto it = scale_map.find(name); + if (it == scale_map.end()) { + scale_map[name] = scale_data; // create new blob + } else { + (*it).second = scale_data; // set data to existing blob + } + return; + } + + std::vector GetScaleMap(std::unordered_map> &scale_map, + const std::string& name) const { + auto it = scale_map.find(name); + if (it != scale_map.end()) { + return (*it).second; + } + return {0}; + } + mkldnn::primitive_attr CreatePostOps(bool fuse_relu, bool fuse_residual_conn, const std::vector output_shift_scale, float sum_scale) const { mkldnn::primitive_attr conv_attr;