提交 782954b4 编写于 作者: X xiaoli.liu@intel.com

Merge branch 'prv-calibration'

...@@ -106,12 +106,21 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -106,12 +106,21 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims()); std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
// Get unique name for storing MKLDNN primitives // Get unique name for storing MKLDNN primitives
const std::string key = platform::ConvMKLDNNHandler::GetHash( const int MaxKeyLength = 256;
src_tz, weights_tz, strides, paddings, dilations, groups, std::string key;
ctx.op().Output("Output")); key.reserve(MaxKeyLength);
AppendKey(key, src_tz, weights_tz, strides, paddings, dilations, groups,
ctx.op().Output("Output"));
//const std::string key = platform::ConvMKLDNNHandler::GetHash(
// src_tz, weights_tz, strides, paddings, dilations, groups,
// ctx.op().Output("Output"));
const std::string key_conv_pd = key + "@conv_pd"; const std::string key_conv_pd = key + "@conv_pd";
bool is_INT8 = ctx.HasInput("Scale_in")? true : false; bool is_INT8 = false;
mkldnn::memory::data_type src_dt = paddle::framework::ToMKLDNNDataType(input->type());
if(src_dt == mkldnn::memory::data_type::u8 || src_dt == mkldnn::memory::data_type::s8){
is_INT8 = true;
}
bool need_s8_to_u8 = false; bool need_s8_to_u8 = false;
if (fuse_residual_conn && is_INT8 && fuse_relu) { if (fuse_residual_conn && is_INT8 && fuse_relu) {
...@@ -336,9 +345,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -336,9 +345,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
bool is_multi_channel = (scale_weights->memory_size() > 1) ? true : false; bool is_multi_channel = (scale_weights->memory_size() > 1) ? true : false;
static std::unordered_map<std::string, std::vector<float>> scale_map;
bool scale_reuse = true;
auto scale_in_key = key + "@scale_in"; auto scale_in_key = key + "@scale_in";
auto scale_weights_key = key + "@scale_weights"; auto scale_weights_key = key + "@scale_weights";
auto scale_out_key = key + "@scale_out"; auto scale_out_key = key + "@scale_out";
...@@ -353,49 +359,26 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -353,49 +359,26 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
std::vector<float> sum_scale = {1.0f}; std::vector<float> sum_scale = {1.0f};
std::vector<float> none_scale = {0}; std::vector<float> none_scale = {0};
if (GetScaleMap(scale_map, scale_in_key) == none_scale){ int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1;
scale_reuse = false; scale_in_data = {*(scale_in->data<float>())};
scale_weights_data.resize(count);
#pragma omp parallel for if (count > 1)
for(int i=0; i<count; i++){
scale_weights_data[i] =*(scale_weights->data<float>() + i);
} }
if(!force_fp32_output)
if(!scale_reuse){ scale_out_data = {*(scale_out->data<float>())};
int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1; output_shift_scale.resize(count);
scale_in_data = {*(scale_in->data<float>())}; #pragma omp parallel for if (count > 1)
scale_weights_data.resize(count); for(int i=0; i<count; i++){
#pragma omp parallel for if (count > 1) if(scale_weights_data[i] == 0.0)
for(int i=0; i<count; i++){ output_shift_scale[i] = scale_out_data[0];
scale_weights_data[i] =*(scale_weights->data<float>() + i); else
} output_shift_scale[i] = scale_out_data[0] / (scale_in_data[0] * scale_weights_data[i]);
if(!force_fp32_output) }
scale_out_data = {*(scale_out->data<float>())}; if(fuse_residual_conn){
output_shift_scale.resize(count); scale_in_eltwise_data = {*(scale_in_eltwise->data<float>())};
#pragma omp parallel for if (count > 1) sum_scale[0] = scale_out_data[0] / scale_in_eltwise_data[0];
for(int i=0; i<count; i++){
if(scale_weights_data[i] == 0.0)
output_shift_scale[i] = scale_out_data[0];
else
output_shift_scale[i] = scale_out_data[0] / (scale_in_data[0] * scale_weights_data[i]);
}
if(fuse_residual_conn){
scale_in_eltwise_data = {*(scale_in_eltwise->data<float>())};
sum_scale[0] = scale_out_data[0] / scale_in_eltwise_data[0];
SetScaleMap(scale_map, scale_in_eltwise_key, scale_in_eltwise_data);
}
//scale reuse
SetScaleMap(scale_map, scale_in_key, scale_in_data);
SetScaleMap(scale_map, scale_weights_key, scale_weights_data);
SetScaleMap(scale_map, scale_out_key, scale_out_data);
SetScaleMap(scale_map, output_shift_scale_key, output_shift_scale);
SetScaleMap(scale_map, sum_scale_key, sum_scale);
} else{
scale_in_data = GetScaleMap(scale_map, scale_in_key);
scale_out_data = GetScaleMap(scale_map, scale_out_key);
scale_weights_data = GetScaleMap(scale_map, scale_weights_key);
if(fuse_residual_conn){
scale_in_eltwise_data = GetScaleMap(scale_map, scale_in_eltwise_key);
}
output_shift_scale = GetScaleMap(scale_map, output_shift_scale_key);
sum_scale = GetScaleMap(scale_map, sum_scale_key);
} }
std::vector<primitive> pipeline; std::vector<primitive> pipeline;
...@@ -516,16 +499,11 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -516,16 +499,11 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
handler->AcquireBiasMemory(user_bias_md, to_void_cast<float>(bias_data)); handler->AcquireBiasMemory(user_bias_md, to_void_cast<float>(bias_data));
std::shared_ptr<mkldnn::memory> bias_memory_p; std::shared_ptr<mkldnn::memory> bias_memory_p;
int mask_reorder = is_multi_channel? 1<<0 : 1; int mask_reorder = is_multi_channel? 1<<0 : 1;
if(!scale_reuse){ int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1;
int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1; scale_bias_data.resize(count);
scale_bias_data.resize(count); #pragma omp parallel for if (count > 1)
#pragma omp parallel for if (count > 1) for(int i=0; i<count; i++){
for(int i=0; i<count; i++){ scale_bias_data[i] = scale_in_data[0] * scale_weights_data[i];
scale_bias_data[i] = scale_in_data[0] * scale_weights_data[i];
}
SetScaleMap(scale_map, scale_bias_key, scale_bias_data);
} else{
scale_bias_data = GetScaleMap(scale_map, scale_bias_key);
} }
bias_memory_p = bias_memory_p =
handler->AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline, is_test, is_INT8, scale_bias_data, mask_reorder); handler->AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline, is_test, is_INT8, scale_bias_data, mask_reorder);
...@@ -564,25 +542,35 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -564,25 +542,35 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
} }
private: private:
void AppendKey(std::string& key, mkldnn::memory::dims& input_dims, // NOLINT
void SetScaleMap(std::unordered_map<std::string, std::vector<float>> &scale_map, mkldnn::memory::dims& weights_dims, // NOLINT
const std::string& name, std::vector<float> scale_data) const { std::vector<int>& strides, // NOLINT
auto it = scale_map.find(name); std::vector<int>& paddings, // NOLINT
if (it == scale_map.end()) { std::vector<int>& dilations, // NOLINT
scale_map[name] = scale_data; // create new blob int groups, const std::string& suffix) const{
} else { AppendKeyDims(key, input_dims);
(*it).second = scale_data; // set data to existing blob AppendKeyDims(key, weights_dims);
AppendKeyVec(key, strides);
AppendKeyVec(key, paddings);
AppendKeyVec(key, dilations);
AppendKey(key, std::to_string(groups));
AppendKey(key, suffix);
}
void AppendKeyDims(std::string& key, const mkldnn::memory::dims& dims) const{
for(unsigned int i=0; i<dims.size(); i++){
AppendKey(key, std::to_string(dims[i]));
} }
return;
} }
std::vector<float> GetScaleMap(std::unordered_map<std::string, std::vector<float>> &scale_map, void AppendKeyVec(std::string& key, const std::vector<int>& dims) const{
const std::string& name) const { for(unsigned int i=0; i<dims.size(); i++){
auto it = scale_map.find(name); AppendKey(key, std::to_string(dims[i]));
if (it != scale_map.end()) {
return (*it).second;
} }
return {0}; }
void AppendKey(std::string& key, const std::string& s) const{
key.append(s);
} }
mkldnn::primitive_attr CreatePostOps(bool fuse_relu, bool fuse_residual_conn, mkldnn::primitive_attr CreatePostOps(bool fuse_relu, bool fuse_residual_conn,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册