提交 edc53a0d 编写于 作者: X xiaolil1

clean code for int8

上级 f35d8ea8
...@@ -275,7 +275,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -275,7 +275,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
void Compute(const paddle::framework::ExecutionContext& ctx) const override { void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()), PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace."); "It must use CPUPlace.");
std::cout<<"this is conv kernel op....................."<<std::endl;
const bool is_test = ctx.Attr<bool>("is_test"); const bool is_test = ctx.Attr<bool>("is_test");
auto& dev_ctx = auto& dev_ctx =
...@@ -320,8 +319,6 @@ std::cout<<"this is conv kernel op....................."<<std::endl; ...@@ -320,8 +319,6 @@ std::cout<<"this is conv kernel op....................."<<std::endl;
bool fuse_residual_conn = ctx.Attr<bool>("fuse_residual_connection"); bool fuse_residual_conn = ctx.Attr<bool>("fuse_residual_connection");
int groups = ctx.Attr<int>("groups"); int groups = ctx.Attr<int>("groups");
std::cout<<"fuse_relu = "<<fuse_relu<<" fuse_residual_conn = "<<fuse_residual_conn<<std::endl;
// TODO(tpatejko): add support for dilation // TODO(tpatejko): add support for dilation
PADDLE_ENFORCE( PADDLE_ENFORCE(
dilations.size() == 2 && dilations[0] == 1 && dilations[1] == 1, dilations.size() == 2 && dilations[0] == 1 && dilations[1] == 1,
...@@ -351,36 +348,15 @@ std::cout<<"fuse_relu = "<<fuse_relu<<" fuse_residual_conn = "<<fuse_residual_c ...@@ -351,36 +348,15 @@ std::cout<<"fuse_relu = "<<fuse_relu<<" fuse_residual_conn = "<<fuse_residual_c
std::vector<float> output_shift_scale; std::vector<float> output_shift_scale;
float sum_scale = 1.0f; float sum_scale = 1.0f;
for(int i=0; i<50; i++){
printf("%f ", (float)*(input_data+i));
}
printf("\n");fflush(stdout);
if(is_INT8){ if(is_INT8){
std::cout<<"this is conv int8 op .............."<<std::endl;
//const uint8_t* input_data_int8 = input->data<uint8_t>(); //FIX ME XIAOLI
//unsigned char* a = (unsigned char*)(input_data);
//for(int i=0; i<50; i++){
// printf("%d ", *(a+i));
//}
//printf("\n");
int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1; int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1;
float scale_in_data = *(scale_in->data<float>()); float scale_in_data = *(scale_in->data<float>());
for(int i=0; i<50; i++){
printf("%f ", *(input_data+i)/scale_in_data);
}
printf("\n");fflush(stdout);
std::cout<<"scale_in = "<<scale_in_data<<std::endl;
std::vector<float> scale_weights_data(count); std::vector<float> scale_weights_data(count);
for(int i=0; i<count; i++){ for(int i=0; i<count; i++){
scale_weights_data[i] =*(scale_weights->data<float>() + i); scale_weights_data[i] =*(scale_weights->data<float>() + i);
} }
float scale_out_data = *(scale_out->data<float>()); float scale_out_data = *(scale_out->data<float>());
std::cout<<"scale_out = "<<scale_out_data<<std::endl;
output_shift_scale.resize(count); output_shift_scale.resize(count);
for(int i=0; i<count; i++){ for(int i=0; i<count; i++){
if(scale_weights_data[i] == 0.0) if(scale_weights_data[i] == 0.0)
...@@ -391,7 +367,6 @@ std::cout<<"scale_out = "<<scale_out_data<<std::endl; ...@@ -391,7 +367,6 @@ std::cout<<"scale_out = "<<scale_out_data<<std::endl;
if(fuse_residual_conn){ if(fuse_residual_conn){
float scale_in_eltwise_data = *(scale_in_eltwise->data<float>()); float scale_in_eltwise_data = *(scale_in_eltwise->data<float>());
sum_scale = scale_out_data / scale_in_eltwise_data; sum_scale = scale_out_data / scale_in_eltwise_data;
std::cout<<"scale_in_eltwise_data = "<<scale_in_eltwise_data<<" scale_out_data = "<<scale_out_data<<" sum_scale = "<<sum_scale<<std::endl;
} }
} }
...@@ -401,8 +376,6 @@ std::cout<<"scale_in_eltwise_data = "<<scale_in_eltwise_data<<" scale_out_data = ...@@ -401,8 +376,6 @@ std::cout<<"scale_in_eltwise_data = "<<scale_in_eltwise_data<<" scale_out_data =
ctx.op().Output("Output")); ctx.op().Output("Output"));
const std::string key_conv_pd = key + "@conv_pd"; const std::string key_conv_pd = key + "@conv_pd";
std::cout<<"current op is = "<<key_conv_pd<<std::endl;
std::vector<primitive> pipeline; std::vector<primitive> pipeline;
auto user_src_md = platform::MKLDNNMemDesc( auto user_src_md = platform::MKLDNNMemDesc(
{src_tz}, paddle::framework::ToMKLDNNDataType(input->type()), input->format()); {src_tz}, paddle::framework::ToMKLDNNDataType(input->type()), input->format());
...@@ -440,13 +413,7 @@ std::cout<<"current op is = "<<key_conv_pd<<std::endl; ...@@ -440,13 +413,7 @@ std::cout<<"current op is = "<<key_conv_pd<<std::endl;
if(dst_dt != residual_dt) if(dst_dt != residual_dt)
dst_dt = residual_dt; dst_dt = residual_dt;
} }
dst_md = platform::MKLDNNMemDesc( dst_md = platform::MKLDNNMemDesc(dst_tz, dst_dt, chosen_memory_format);
dst_tz,// memory::data_type::f32, chosen_memory_format);
dst_dt,//paddle::framework::ToMKLDNNDataType(std::type_index(typeid(unsigned char))),
chosen_memory_format);
//fuse_relu? paddle::framework::ToMKLDNNDataType(std::type_index(typeid(unsigned char))) :
//paddle::framework::ToMKLDNNDataType(std::type_index(typeid(signed char))),
//chosen_memory_format);
} }
// create a conv primitive descriptor and save it for usage in backward // create a conv primitive descriptor and save it for usage in backward
...@@ -495,8 +462,7 @@ std::cout<<"current op is = "<<key_conv_pd<<std::endl; ...@@ -495,8 +462,7 @@ std::cout<<"current op is = "<<key_conv_pd<<std::endl;
// create reorder primitive if the input format is not the preferred one // create reorder primitive if the input format is not the preferred one
auto src_memory_p = auto src_memory_p =
handler.AcquireSrcMemoryFromPrimitive(user_src_memory_p, pipeline); handler.AcquireSrcMemoryFromPrimitive(user_src_memory_p, pipeline);
std::shared_ptr<mkldnn::memory> weights_memory_p;// = handler.AcquireWeightsMemoryFromPrimitive( std::shared_ptr<mkldnn::memory> weights_memory_p;
//user_weights_memory_p, pipeline, is_test);
if(is_INT8){ if(is_INT8){
int mask_reorder = is_multi_channel? ((g!= 1) ? (1<<1)+(1<<0) : 1<<0) : 0; int mask_reorder = is_multi_channel? ((g!= 1) ? (1<<1)+(1<<0) : 1<<0) : 0;
int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1; int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1;
...@@ -515,48 +481,18 @@ std::cout<<"current op is = "<<key_conv_pd<<std::endl; ...@@ -515,48 +481,18 @@ std::cout<<"current op is = "<<key_conv_pd<<std::endl;
if(is_INT8){ if(is_INT8){
if (fuse_residual_conn) { if (fuse_residual_conn) {
auto residual_param = ctx.Input<Tensor>("ResidualData"); auto residual_param = ctx.Input<Tensor>("ResidualData");
//auto residual_param_data = residual_param->data<T>();
PADDLE_ENFORCE_EQ(output->dims(), residual_param->dims(), PADDLE_ENFORCE_EQ(output->dims(), residual_param->dims(),
"Output and elementwise parameter need to have the " "Output and elementwise parameter need to have the "
"same dimension sizes"); "same dimension sizes");
//std::cout<<"output = "<<output<<" residual_param = "<<residual_param<<std::endl;
output->ShareDataWith(*residual_param); output->ShareDataWith(*residual_param);
auto residual_dt = paddle::framework::ToMKLDNNDataType(residual_param->type()); auto residual_dt = paddle::framework::ToMKLDNNDataType(residual_param->type());
if(residual_dt == mkldnn::memory::data_type::u8){ if(residual_dt == mkldnn::memory::data_type::u8){
uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace()); uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
//std::cout<<"after share output = "<<output<<" residual_param = "<<residual_param<<std::endl;
//float scale_in_eltwise_data = *(scale_in_eltwise->data<float>());
printf("residual is u8: this is bottom 1 data\n");
//unsigned char* f = (unsigned char*)(residual_param_data);
//for(int i=0; i<50; i++){
// printf("%f ", (float)f[i]/scale_in_eltwise_data);
//}
//printf("\n");
//printf("this is output data\n");
//unsigned char* e = (unsigned char*)(output_data);
//for(int i=0; i<50; i++){
// printf("%f ", (float)e[i]/scale_in_eltwise_data);
//}
//printf("\n");
dst_memory_p = dst_memory_p =
handler.AcquireDstMemoryFromPrimitive(to_void_cast<uint8_t>(output_data)); handler.AcquireDstMemoryFromPrimitive(to_void_cast<uint8_t>(output_data));
} else{ } else{
int8_t* output_data = output->mutable_data<int8_t>(ctx.GetPlace()); int8_t* output_data = output->mutable_data<int8_t>(ctx.GetPlace());
//std::cout<<"after share output = "<<output<<" residual_param = "<<residual_param<<std::endl;
printf("residual is s8 : this is bottom 1 data\n");
//char* f = (char*)(residual_param_data);
//for(int i=0; i<50; i++){
// printf("%f ", (float)f[i]);
//}
//printf("\n");
//printf("this is output data\n");
//char* e = (char*)(output_data);
//for(int i=0; i<50; i++){
// printf("%f ", (float)e[i]);
//}
//printf("\n");
dst_memory_p = dst_memory_p =
handler.AcquireDstMemoryFromPrimitive(to_void_cast<int8_t>(output_data)); handler.AcquireDstMemoryFromPrimitive(to_void_cast<int8_t>(output_data));
} }
...@@ -571,7 +507,6 @@ printf("residual is s8 : this is bottom 1 data\n"); ...@@ -571,7 +507,6 @@ printf("residual is s8 : this is bottom 1 data\n");
handler.AcquireDstMemoryFromPrimitive(to_void_cast<int8_t>(output_data)); handler.AcquireDstMemoryFromPrimitive(to_void_cast<int8_t>(output_data));
} }
} }
std::cout<<"input fmt = "<<input->format()<<" input dt = "<<paddle::framework::ToMKLDNNDataType(input->type())<<" output fmt = "<<output->format()<<" output dt = "<<paddle::framework::ToMKLDNNDataType(output->type())<<" dst fmt = "<<dst_memory_p->get_primitive_desc().desc().data.format<<" dst dt = "<<dst_memory_p->get_primitive_desc().desc().data.data_type<<std::endl;
} else{ } else{
T* output_data = nullptr; T* output_data = nullptr;
if (fuse_residual_conn) { if (fuse_residual_conn) {
...@@ -603,8 +538,7 @@ std::cout<<"input fmt = "<<input->format()<<" input dt = "<<paddle::framework: ...@@ -603,8 +538,7 @@ std::cout<<"input fmt = "<<input->format()<<" input dt = "<<paddle::framework:
{bias_tz}, platform::MKLDNNGetDataType<float>(), memory::format::x); {bias_tz}, platform::MKLDNNGetDataType<float>(), memory::format::x);
auto user_bias_memory_p = auto user_bias_memory_p =
handler.AcquireBiasMemory(user_bias_md, to_void_cast<float>(bias_data)); handler.AcquireBiasMemory(user_bias_md, to_void_cast<float>(bias_data));
std::shared_ptr<mkldnn::memory> bias_memory_p;// = std::shared_ptr<mkldnn::memory> bias_memory_p;
//handler.AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline);
if(is_INT8){ if(is_INT8){
int mask_reorder = is_multi_channel? 1<<0 : 1; int mask_reorder = is_multi_channel? 1<<0 : 1;
int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1; int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1;
...@@ -632,12 +566,6 @@ std::cout<<"input fmt = "<<input->format()<<" input dt = "<<paddle::framework: ...@@ -632,12 +566,6 @@ std::cout<<"input fmt = "<<input->format()<<" input dt = "<<paddle::framework:
output->set_layout(DataLayout::kMKLDNN); output->set_layout(DataLayout::kMKLDNN);
output->set_format(GetMKLDNNFormat(*dst_memory_p)); output->set_format(GetMKLDNNFormat(*dst_memory_p));
//if(is_INT8){
// uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace()); //work aroud forsum fusion
// std::cout<<"output_data = "<<output_data<<std::endl;
//}
std::cout<<"input fmt = "<<input->format()<<" output fmt = "<<output->format()<<" dst fmt = "<<dst_memory_p->get_primitive_desc().desc().data.format<<"output dt = "<<paddle::framework::ToMKLDNNDataType(output->type())<<"dst dt = "<<dst_memory_p->get_primitive_desc().desc().data.data_type<<std::endl;
std::cout<<"this is conv end!!!!!!!!!!!!!!!!!!!!"<<std::endl;
} }
private: private:
......
...@@ -40,7 +40,6 @@ class DeQuantOpKernel : public framework::OpKernel<T> { ...@@ -40,7 +40,6 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
auto* input = ctx.Input<Tensor>("Input"); auto* input = ctx.Input<Tensor>("Input");
auto* scale = ctx.Input<Tensor>("Scale"); auto* scale = ctx.Input<Tensor>("Scale");
auto* output = ctx.Output<Tensor>("Output"); auto* output = ctx.Output<Tensor>("Output");
std::cout<<"this is dequant op ***********"<<std::endl;
auto& dev_ctx = auto& dev_ctx =
ctx.template device_context<platform::MKLDNNDeviceContext>(); ctx.template device_context<platform::MKLDNNDeviceContext>();
const auto& engine = dev_ctx.GetEngine(); const auto& engine = dev_ctx.GetEngine();
...@@ -51,16 +50,6 @@ std::cout<<"this is dequant op ***********"<<std::endl; ...@@ -51,16 +50,6 @@ std::cout<<"this is dequant op ***********"<<std::endl;
std::vector<float> scale_data = {*(scale->data<float>())}; std::vector<float> scale_data = {*(scale->data<float>())};
std::vector<float> reorder_scale = {1.0f / scale_data[0]}; std::vector<float> reorder_scale = {1.0f / scale_data[0]};
for(int i=0; i<50; i++){
printf("%d ", *(input_data+i));
}
printf("\n");fflush(stdout);
for(int i=0; i<50; i++){
printf("%f ", *(input_data+i)/scale_data[0]);
}
printf("\n");fflush(stdout);
std::cout<<"scale = "<<scale_data[0]<<std::endl;
std::vector<primitive> pipeline; std::vector<primitive> pipeline;
std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims()); std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims());
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims()); std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
......
...@@ -72,7 +72,6 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -72,7 +72,6 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
void Compute(const paddle::framework::ExecutionContext& ctx) const override { void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()), PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace."); "It must use CPUPlace.");
std::cout<<"this is pool op"<<std::endl;
auto& dev_ctx = auto& dev_ctx =
ctx.template device_context<platform::MKLDNNDeviceContext>(); ctx.template device_context<platform::MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine(); const auto& mkldnn_engine = dev_ctx.GetEngine();
...@@ -105,16 +104,6 @@ std::cout<<"this is pool op"<<std::endl; ...@@ -105,16 +104,6 @@ std::cout<<"this is pool op"<<std::endl;
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
T* output_data = output->mutable_data<T>(ctx.GetPlace()); T* output_data = output->mutable_data<T>(ctx.GetPlace());
for(int i=0; i<50; i++){
printf("%f ",(float) *(input_data+i));
}
printf("\n");fflush(stdout);
for(int i=0; i<50; i++){
printf("%f ", *(input_data+i)/14.4791);
}
printf("\n");fflush(stdout);
std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims()); std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims());
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims()); std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
...@@ -143,8 +132,6 @@ printf("\n");fflush(stdout); ...@@ -143,8 +132,6 @@ printf("\n");fflush(stdout);
mkldnn::memory::data_type dt = paddle::framework::ToMKLDNNDataType(input->type()); mkldnn::memory::data_type dt = paddle::framework::ToMKLDNNDataType(input->type());
std::cout<<"input type = "<<dt<<std::endl;
auto src_md = platform::MKLDNNMemDesc( auto src_md = platform::MKLDNNMemDesc(
src_tz, dt, input_format); src_tz, dt, input_format);
...@@ -203,25 +190,12 @@ std::cout<<"input type = "<<dt<<std::endl; ...@@ -203,25 +190,12 @@ std::cout<<"input type = "<<dt<<std::endl;
.data.format; .data.format;
} }
printf("befor submit!!!!!!!!!!!\n");
for(int i=0; i<50; i++){
printf("%f ", *(output_data+i)/14.4791);
}
printf("\n");fflush(stdout);
// push primitive to stream and wait until it's executed // push primitive to stream and wait until it's executed
std::vector<mkldnn::primitive> pipeline{*(pool_p.get())}; std::vector<mkldnn::primitive> pipeline{*(pool_p.get())};
stream(stream::kind::eager).submit(pipeline).wait(); stream(stream::kind::eager).submit(pipeline).wait();
printf("after submit!!!!!!!!!!!\n");
for(int i=0; i<50; i++){
printf("%f ", *(output_data+i)/14.4791);
}
printf("\n");fflush(stdout);
output->set_layout(DataLayout::kMKLDNN); output->set_layout(DataLayout::kMKLDNN);
output->set_format(output_format); output->set_format(output_format);
std::cout<<"input fmt = "<<input->format()<<" output fmt = "<<output->format()<<"output dt = "<<paddle::framework::ToMKLDNNDataType(output->type())<<std::endl;
} }
private: private:
......
...@@ -37,7 +37,6 @@ class QuantOpKernel : public framework::OpKernel<T> { ...@@ -37,7 +37,6 @@ class QuantOpKernel : public framework::OpKernel<T> {
auto* input = ctx.Input<Tensor>("Input"); auto* input = ctx.Input<Tensor>("Input");
auto* scale = ctx.Input<Tensor>("Scale"); auto* scale = ctx.Input<Tensor>("Scale");
auto* output = ctx.Output<Tensor>("Output"); auto* output = ctx.Output<Tensor>("Output");
std::cout<<"this is quantize op!!!!!!!!!!!!!!"<<std::endl;
auto& dev_ctx = auto& dev_ctx =
ctx.template device_context<platform::MKLDNNDeviceContext>(); ctx.template device_context<platform::MKLDNNDeviceContext>();
const auto& engine = dev_ctx.GetEngine(); const auto& engine = dev_ctx.GetEngine();
...@@ -50,32 +49,6 @@ std::cout<<"this is quantize op!!!!!!!!!!!!!!"<<std::endl; ...@@ -50,32 +49,6 @@ std::cout<<"this is quantize op!!!!!!!!!!!!!!"<<std::endl;
uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace()); uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
std::vector<T> scale_data = {*(scale->data<T>())}; std::vector<T> scale_data = {*(scale->data<T>())};
FILE *fp = fopen("quant_input.txt","w");
printf("quantize check!!!!\n");
std::vector<int> tz = paddle::framework::vectorize2int(input->dims());
int count=1;
for(int i=0; i<tz.size(); i++){
count*=tz[i];
printf("%d ",tz[i]);
}
printf("\n");
int num=0;
for(int i=0; i<count; i++){
if(num==32){ fprintf(fp,"\n"); num=0;}
fprintf(fp,"%f ", *(input_data+i));
num ++;
}
fprintf(fp,"\n");
fclose(fp);
for(int i=0; i<scale_data.size(); i++){
printf("%f", scale_data[i]);
}
printf("\n");
for(int i=0; i<50; i++){
printf("%f ", (*(input_data+i))*scale_data[0]);
}
printf("\n");
fflush(stdout);
mkldnn::primitive_attr attri; mkldnn::primitive_attr attri;
int mask = 0; int mask = 0;
attri.set_output_scales(mask, scale_data); attri.set_output_scales(mask, scale_data);
......
...@@ -38,7 +38,6 @@ class ReQuantOpKernel : public framework::OpKernel<T> { ...@@ -38,7 +38,6 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
auto* input = ctx.Input<Tensor>("Input"); auto* input = ctx.Input<Tensor>("Input");
//auto* scale = ctx.Input<Tensor>("Scale"); //auto* scale = ctx.Input<Tensor>("Scale");
auto* output = ctx.Output<Tensor>("Output"); auto* output = ctx.Output<Tensor>("Output");
std::cout<<"this is requantize op!!!!!!!!!!"<<std::endl;
auto& dev_ctx = auto& dev_ctx =
ctx.template device_context<platform::MKLDNNDeviceContext>(); ctx.template device_context<platform::MKLDNNDeviceContext>();
const auto& engine = dev_ctx.GetEngine(); const auto& engine = dev_ctx.GetEngine();
...@@ -75,49 +74,12 @@ std::cout<<"this is requantize op!!!!!!!!!!"<<std::endl; ...@@ -75,49 +74,12 @@ std::cout<<"this is requantize op!!!!!!!!!!"<<std::endl;
auto reorder_pd = std::shared_ptr<reorder::primitive_desc>( auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
new reorder::primitive_desc(src_pd, dst_pd, attri)); new reorder::primitive_desc(src_pd, dst_pd, attri));
for(int i=0; i<50; i++){ auto reorder_p= std::shared_ptr<reorder>(new reorder(*reorder_pd, *src_memory_p, dst_memory));
printf("%d ", *(input_data+i)); pipeline.push_back(*reorder_p);
} stream(stream::kind::eager).submit(pipeline).wait();
printf("\n");fflush(stdout);
//for(int i=0; i<50; i++){
// printf("%f ", *(input_data+i)/107.426);
//}
//printf("\n");fflush(stdout);
std::cout<<"scale = "<<scale_data[0]<<std::endl;
//for(int i=0; i<50; i++){
// printf("%f ", *(output_data+i)/107.426);
//}
//printf("\n");fflush(stdout);
// int is_sum = false;//ctx.Attr<int>("is_sum");
// if(is_sum){
//std::cout<<"input fmt = "<<input->format()<<" output fmt = "<<output->format()<<"output dt = "<<paddle::framework::ToMKLDNNDataType(output->type())<<std::endl;
// output_data = (uint8_t*)input_data;
//std::cout<<"input fmt = "<<input->format()<<" output fmt = "<<output->format()<<"output dt = "<<paddle::framework::ToMKLDNNDataType(output->type())<<std::endl;
//
//printf("after*************\n");
//for(int i=0; i<50; i++){
// printf("%f ", *(output_data+i)/107.426);
//}
//printf("\n");fflush(stdout);
//
// } else{
auto reorder_p= std::shared_ptr<reorder>(new reorder(*reorder_pd, *src_memory_p, dst_memory));
pipeline.push_back(*reorder_p);
stream(stream::kind::eager).submit(pipeline).wait();
// }
//uint8_t* output_data_2 = output->mutable_data<uint8_t>(ctx.GetPlace());
//for(int i=0; i<50; i++){
// printf("%f ", *(output_data_2+i)/107.426);
//}
//printf("\n");fflush(stdout);
for(int i=0; i<50; i++){
printf("%d ", *(output_data+i));
}
printf("\n");fflush(stdout);
output->set_layout(DataLayout::kMKLDNN); output->set_layout(DataLayout::kMKLDNN);
output->set_format(GetMKLDNNFormat(dst_memory)); output->set_format(GetMKLDNNFormat(dst_memory));
std::cout<<"input fmt = "<<input->format()<<" output fmt = "<<output->format()<<"output dt = "<<paddle::framework::ToMKLDNNDataType(output->type())<<std::endl;
} }
}; };
......
...@@ -132,13 +132,6 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> { ...@@ -132,13 +132,6 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
const T* input_data = flattened_input.data<T>(); const T* input_data = flattened_input.data<T>();
T* output_data = flattened_output.mutable_data<T>(ctx.GetPlace()); T* output_data = flattened_output.mutable_data<T>(ctx.GetPlace());
printf("this is softmax!!!!!!!!!!!!!!\n");
for(int i=0; i<50; i++){
printf("%f ", (float)*(input_data+i));
}
printf("\n");fflush(stdout);
std::cout<<"input fmt = "<<input->format()<<" input dt = "<<paddle::framework::ToMKLDNNDataType(input->type())<<" output fmt = "<<output->format()<<" output dt = "<<paddle::framework::ToMKLDNNDataType(output->type())<<std::endl;
std::vector<int> src_tz = paddle::framework::vectorize2int(flattened_dims); std::vector<int> src_tz = paddle::framework::vectorize2int(flattened_dims);
std::vector<int> dst_tz = src_tz; std::vector<int> dst_tz = src_tz;
...@@ -170,7 +163,7 @@ std::cout<<"input fmt = "<<input->format()<<" input dt = "<<paddle::framework: ...@@ -170,7 +163,7 @@ std::cout<<"input fmt = "<<input->format()<<" input dt = "<<paddle::framework:
std::vector<primitive> pipeline{ std::vector<primitive> pipeline{
*(static_cast<softmax_forward::primitive*>(softmax_p.get()))}; *(static_cast<softmax_forward::primitive*>(softmax_p.get()))};
stream(stream::kind::eager).submit(pipeline).wait(); stream(stream::kind::eager).submit(pipeline).wait();
std::cout<<"input fmt = "<<input->format()<<" input dt = "<<paddle::framework::ToMKLDNNDataType(input->type())<<" output fmt = "<<output->format()<<" output dt = "<<paddle::framework::ToMKLDNNDataType(output->type())<<std::endl;
const bool is_test = ctx.Attr<bool>("is_test"); const bool is_test = ctx.Attr<bool>("is_test");
if (!is_test) { if (!is_test) {
T threshold = exp(-64); T threshold = exp(-64);
......
...@@ -70,7 +70,6 @@ inline mkldnn::memory::desc MKLDNNMemDesc(const std::vector<int>& dims, ...@@ -70,7 +70,6 @@ inline mkldnn::memory::desc MKLDNNMemDesc(const std::vector<int>& dims,
mkldnn::memory::data_type data_type, mkldnn::memory::data_type data_type,
mkldnn::memory::format format) { mkldnn::memory::format format) {
mkldnn::memory::dims tz = dims; mkldnn::memory::dims tz = dims;
std::cout<<"this is MKLDNNMemDesc"<<" data_type"<<data_type<<" format"<<format<<std::endl;
return mkldnn::memory::desc({tz}, data_type, format); return mkldnn::memory::desc({tz}, data_type, format);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册