diff --git a/paddle/fluid/framework/data_layout_transform.h b/paddle/fluid/framework/data_layout_transform.h index 90bb206ec6b698bc23ad1a5c9609a25186ec6de8..763fc18bf1c79d363bd66ec74fd14891e3306f8f 100644 --- a/paddle/fluid/framework/data_layout_transform.h +++ b/paddle/fluid/framework/data_layout_transform.h @@ -53,7 +53,7 @@ inline DataLayout ToPaddleLayout(const MKLDNNFormat& format) { inline MKLDNNDataType ToMKLDNNDataType(const std::type_index type) { static const std::map dict{ {std::type_index(typeid(float)), MKLDNNDataType::f32}, // NOLINT - {std::type_index(typeid(char)), MKLDNNDataType::s8}, // NOLINT + {std::type_index(typeid(signed char)), MKLDNNDataType::s8}, // NOLINT {std::type_index(typeid(unsigned char)), MKLDNNDataType::u8}, {std::type_index(typeid(int16_t)), MKLDNNDataType::s16}, {std::type_index(typeid(int32_t)), MKLDNNDataType::s32}}; diff --git a/paddle/fluid/framework/tensor_impl.h b/paddle/fluid/framework/tensor_impl.h index 6d3047c95d6cf30c2a5308d4f69ded367066d78c..efdf01504b616b2349f608b9015649ad726368c3 100644 --- a/paddle/fluid/framework/tensor_impl.h +++ b/paddle/fluid/framework/tensor_impl.h @@ -25,8 +25,8 @@ inline const T* Tensor::data() const { check_memory_size(); bool valid = std::is_same::value || holder_->type() == std::type_index(typeid(T)); - PADDLE_ENFORCE(valid, "Tensor holds the wrong type, it holds %s", - this->holder_->type().name()); + PADDLE_ENFORCE(valid, "Tensor holds the wrong type, it holds %d", + this->holder_->type()); return reinterpret_cast( reinterpret_cast(holder_->ptr()) + offset_); diff --git a/paddle/fluid/operators/conv_mkldnn_op.cc b/paddle/fluid/operators/conv_mkldnn_op.cc index 27a4f38e40f209e252343014ff3237369c476297..ce5b53a521f31ee702f365fde2383e691158a745 100644 --- a/paddle/fluid/operators/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/conv_mkldnn_op.cc @@ -358,7 +358,7 @@ printf("\n");fflush(stdout); if(is_INT8){ std::cout<<"this is conv int8 op .............."<data(); //FIX ME XIAOLI //unsigned char* a = (unsigned char*)(input_data); //for(int i=0; i<50; i++){ // printf("%d ", *(a+i)); @@ -373,12 +373,14 @@ for(int i=0; i<50; i++){ } printf("\n");fflush(stdout); +std::cout<<"scale_in = "< scale_weights_data(count); for(int i=0; idata() + i); } float scale_out_data = *(scale_out->data()); - +std::cout<<"scale_out = "<data()); sum_scale = scale_out_data / scale_in_eltwise_data; +std::cout<<"scale_in_eltwise_data = "<type()); + if(dst_dt != residual_dt) + dst_dt = residual_dt; + } dst_md = platform::MKLDNNMemDesc( - dst_tz, - fuse_relu? paddle::framework::ToMKLDNNDataType(std::type_index(typeid(unsigned char))) : - paddle::framework::ToMKLDNNDataType(std::type_index(typeid(char))), + dst_tz,// memory::data_type::f32, chosen_memory_format); + dst_dt,//paddle::framework::ToMKLDNNDataType(std::type_index(typeid(unsigned char))), chosen_memory_format); + //fuse_relu? paddle::framework::ToMKLDNNDataType(std::type_index(typeid(unsigned char))) : + //paddle::framework::ToMKLDNNDataType(std::type_index(typeid(signed char))), + //chosen_memory_format); } // create a conv primitive descriptor and save it for usage in backward @@ -486,7 +498,7 @@ std::cout< weights_memory_p;// = handler.AcquireWeightsMemoryFromPrimitive( //user_weights_memory_p, pipeline, is_test); if(is_INT8){ - int mask_reorder = is_multi_channel? 0 : ((g!= 1) ? (1<<1)+(1<<0) : 1<<0); + int mask_reorder = is_multi_channel? ((g!= 1) ? (1<<1)+(1<<0) : 1<<0) : 0; int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1; std::vector scale_weights_data(count); for(int i=0; i("ResidualData"); + //auto residual_param_data = residual_param->data(); PADDLE_ENFORCE_EQ(output->dims(), residual_param->dims(), "Output and elementwise parameter need to have the " "same dimension sizes"); - +//std::cout<<"output = "<(output_data)); } else{ int8_t* output_data = output->mutable_data(ctx.GetPlace()); +//std::cout<<"after share output = "<(output_data)); } @@ -563,7 +606,7 @@ std::cout<<"input fmt = "<format()<<" input dt = "< bias_memory_p;// = //handler.AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline); if(is_INT8){ - int mask_reorder = is_multi_channel? 0 : 1<<0; + int mask_reorder = is_multi_channel? 1<<0 : 1; int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1; std::vector scale_bias_data(count); for(int i=0; imutable_data(ctx.GetPlace()); //T scale_data = *(scale->data()); std::vector scale_data = {*(scale->data())}; + std::vector reorder_scale = {1.0f / scale_data[0]}; + +for(int i=0; i<50; i++){ + printf("%d ", *(input_data+i)); +} +printf("\n");fflush(stdout); +for(int i=0; i<50; i++){ + printf("%f ", *(input_data+i)/scale_data[0]); +} +printf("\n");fflush(stdout); +std::cout<<"scale = "< pipeline; std::vector src_tz = paddle::framework::vectorize2int(input->dims()); @@ -58,7 +69,7 @@ std::cout<<"this is dequant op ***********"<(new reorder(*reorder_pd, *src_memory_p, dst_memory)); pipeline.push_back(*reorder_p); + stream(stream::kind::eager).submit(pipeline).wait(); + + output->set_layout(DataLayout::kMKLDNN); + output->set_format(GetMKLDNNFormat(dst_memory)); } }; diff --git a/paddle/fluid/operators/pool_mkldnn_op.cc b/paddle/fluid/operators/pool_mkldnn_op.cc index d8799287ee197dd9a2b1f3eee7c16e5f4e3ec10f..cdb748f05d73366d53a27d5c34c42423119247ef 100644 --- a/paddle/fluid/operators/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/pool_mkldnn_op.cc @@ -105,6 +105,16 @@ std::cout<<"this is pool op"<data(); T* output_data = output->mutable_data(ctx.GetPlace()); +for(int i=0; i<50; i++){ + printf("%f ",(float) *(input_data+i)); +} +printf("\n");fflush(stdout); +for(int i=0; i<50; i++){ + printf("%f ", *(input_data+i)/14.4791); +} +printf("\n");fflush(stdout); + + std::vector src_tz = paddle::framework::vectorize2int(input->dims()); std::vector dst_tz = paddle::framework::vectorize2int(output->dims()); @@ -193,12 +203,25 @@ std::cout<<"input type = "< pipeline{*(pool_p.get())}; stream(stream::kind::eager).submit(pipeline).wait(); +printf("after submit!!!!!!!!!!!\n"); +for(int i=0; i<50; i++){ + printf("%f ", *(output_data+i)/14.4791); +} +printf("\n");fflush(stdout); + output->set_layout(DataLayout::kMKLDNN); output->set_format(output_format); +std::cout<<"input fmt = "<format()<<" output fmt = "<format()<<"output dt = "<type())< { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* input = ctx.Input("Input"); - auto* scale = ctx.Input("Scale"); + //auto* scale = ctx.Input("Scale"); auto* output = ctx.Output("Output"); - +std::cout<<"this is requantize op!!!!!!!!!!"<(); const auto& engine = dev_ctx.GetEngine(); @@ -47,18 +47,18 @@ class ReQuantOpKernel : public framework::OpKernel { std::vector src_tz = paddle::framework::vectorize2int(input->dims()); std::vector dst_tz = paddle::framework::vectorize2int(output->dims()); mkldnn::memory::data_type src_dt = paddle::framework::ToMKLDNNDataType(input->type()); - mkldnn::memory::data_type dst_dt = paddle::framework::ToMKLDNNDataType(output->type()); + mkldnn::memory::data_type dst_dt = mkldnn::memory::data_type::u8;//paddle::framework::ToMKLDNNDataType(output->type()); mkldnn::memory::format src_fmt = memory::format::nhwc;//input->format(); mkldnn::memory::format dst_fmt = memory::format::nhwc;//output->format(); const T* input_data = input->data(); - T* output_data = output->mutable_data(ctx.GetPlace()); + uint8_t* output_data = output->mutable_data(ctx.GetPlace()); //T scale_data = *(scale->data()); - std::vector scale_data = {*(scale->data())}; + std::vector scale_data = {0.9999999}; //{*(scale->data())}; mkldnn::primitive_attr attri; int mask = 0; - attri.set_output_scales(mask, scale_data); + attri.set_output_scales(mask,scale_data);// scale_data); //attri.set_int_output_round_mode(round_nearest); //FIX ME auto src_md = platform::MKLDNNMemDesc( @@ -70,13 +70,54 @@ class ReQuantOpKernel : public framework::OpKernel { auto dst_md = platform::MKLDNNMemDesc( {dst_tz}, dst_dt, dst_fmt); auto dst_pd = mkldnn::memory::primitive_desc(dst_md, engine); - auto dst_memory = mkldnn::memory(dst_pd, to_void_cast(output_data)); + auto dst_memory = mkldnn::memory(dst_pd, to_void_cast(output_data)); auto reorder_pd = std::shared_ptr( - new reorder::primitive_desc(dst_pd, src_pd, attri)); - auto reorder_p= std::shared_ptr(new reorder(*reorder_pd, *src_memory_p, dst_memory)); - pipeline.push_back(*reorder_p); + new reorder::primitive_desc(src_pd, dst_pd, attri)); +for(int i=0; i<50; i++){ + printf("%d ", *(input_data+i)); +} +printf("\n");fflush(stdout); +//for(int i=0; i<50; i++){ +// printf("%f ", *(input_data+i)/107.426); +//} +//printf("\n");fflush(stdout); +std::cout<<"scale = "<("is_sum"); +// if(is_sum){ +//std::cout<<"input fmt = "<format()<<" output fmt = "<format()<<"output dt = "<type())<(new reorder(*reorder_pd, *src_memory_p, dst_memory)); + pipeline.push_back(*reorder_p); + stream(stream::kind::eager).submit(pipeline).wait(); +// } +//uint8_t* output_data_2 = output->mutable_data(ctx.GetPlace()); +//for(int i=0; i<50; i++){ +// printf("%f ", *(output_data_2+i)/107.426); +//} +//printf("\n");fflush(stdout); +for(int i=0; i<50; i++){ + printf("%d ", *(output_data+i)); +} +printf("\n");fflush(stdout); + output->set_layout(DataLayout::kMKLDNN); + output->set_format(GetMKLDNNFormat(dst_memory)); +std::cout<<"input fmt = "<format()<<" output fmt = "<format()<<"output dt = "<type())<); -REGISTER_OP_KERNEL(requantize, MKLDNN, ::paddle::platform::CPUPlace, ops::ReQuantOpKernel); +REGISTER_OP_KERNEL(requantize, MKLDNN, ::paddle::platform::CPUPlace, ops::ReQuantOpKernel); diff --git a/paddle/fluid/operators/softmax_mkldnn_op.cc b/paddle/fluid/operators/softmax_mkldnn_op.cc index 01819f53e3ab0973f6140c5a81f18f954b6a0376..6388a4247379a5f736c881a4cee2ae9a83a3e5cf 100644 --- a/paddle/fluid/operators/softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/softmax_mkldnn_op.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include "mkldnn.hpp" #include "paddle/fluid/operators/softmax_op.h" #include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/framework/data_layout_transform.h" namespace paddle { namespace operators { @@ -131,6 +132,13 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { const T* input_data = flattened_input.data(); T* output_data = flattened_output.mutable_data(ctx.GetPlace()); +printf("this is softmax!!!!!!!!!!!!!!\n"); +for(int i=0; i<50; i++){ + printf("%f ", (float)*(input_data+i)); +} +printf("\n");fflush(stdout); + +std::cout<<"input fmt = "<format()<<" input dt = "<type())<<" output fmt = "<format()<<" output dt = "<type())< src_tz = paddle::framework::vectorize2int(flattened_dims); std::vector dst_tz = src_tz; @@ -162,7 +170,7 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { std::vector pipeline{ *(static_cast(softmax_p.get()))}; stream(stream::kind::eager).submit(pipeline).wait(); - +std::cout<<"input fmt = "<format()<<" input dt = "<type())<<" output fmt = "<format()<<" output dt = "<type())<("is_test"); if (!is_test) { T threshold = exp(-64);