提交 f35d8ea8 编写于 作者: X xiaolil1

fix bugs for INT8 with work around and debug logs

上级 6e6944cf
......@@ -53,7 +53,7 @@ inline DataLayout ToPaddleLayout(const MKLDNNFormat& format) {
inline MKLDNNDataType ToMKLDNNDataType(const std::type_index type) {
static const std::map<std::type_index, MKLDNNDataType> dict{
{std::type_index(typeid(float)), MKLDNNDataType::f32}, // NOLINT
{std::type_index(typeid(char)), MKLDNNDataType::s8}, // NOLINT
{std::type_index(typeid(signed char)), MKLDNNDataType::s8}, // NOLINT
{std::type_index(typeid(unsigned char)), MKLDNNDataType::u8},
{std::type_index(typeid(int16_t)), MKLDNNDataType::s16},
{std::type_index(typeid(int32_t)), MKLDNNDataType::s32}};
......
......@@ -25,8 +25,8 @@ inline const T* Tensor::data() const {
check_memory_size();
bool valid = std::is_same<T, void>::value ||
holder_->type() == std::type_index(typeid(T));
PADDLE_ENFORCE(valid, "Tensor holds the wrong type, it holds %s",
this->holder_->type().name());
PADDLE_ENFORCE(valid, "Tensor holds the wrong type, it holds %d",
this->holder_->type());
return reinterpret_cast<const T*>(
reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
......
......@@ -358,7 +358,7 @@ printf("\n");fflush(stdout);
if(is_INT8){
std::cout<<"this is conv int8 op .............."<<std::endl;
//const uint8_t* input_data_int8 = input->data<uint8_t>(); //FIX ME XIAOLI
//unsigned char* a = (unsigned char*)(input_data);
//for(int i=0; i<50; i++){
// printf("%d ", *(a+i));
......@@ -373,12 +373,14 @@ for(int i=0; i<50; i++){
}
printf("\n");fflush(stdout);
std::cout<<"scale_in = "<<scale_in_data<<std::endl;
std::vector<float> scale_weights_data(count);
for(int i=0; i<count; i++){
scale_weights_data[i] =*(scale_weights->data<float>() + i);
}
float scale_out_data = *(scale_out->data<float>());
std::cout<<"scale_out = "<<scale_out_data<<std::endl;
output_shift_scale.resize(count);
for(int i=0; i<count; i++){
if(scale_weights_data[i] == 0.0)
......@@ -389,6 +391,7 @@ printf("\n");fflush(stdout);
if(fuse_residual_conn){
float scale_in_eltwise_data = *(scale_in_eltwise->data<float>());
sum_scale = scale_out_data / scale_in_eltwise_data;
std::cout<<"scale_in_eltwise_data = "<<scale_in_eltwise_data<<" scale_out_data = "<<scale_out_data<<" sum_scale = "<<sum_scale<<std::endl;
}
}
......@@ -398,7 +401,7 @@ printf("\n");fflush(stdout);
ctx.op().Output("Output"));
const std::string key_conv_pd = key + "@conv_pd";
std::cout<<key_conv_pd<<std::endl;
std::cout<<"current op is = "<<key_conv_pd<<std::endl;
std::vector<primitive> pipeline;
auto user_src_md = platform::MKLDNNMemDesc(
......@@ -430,11 +433,20 @@ std::cout<<key_conv_pd<<std::endl;
weights_md = platform::MKLDNNMemDesc(
weights_tz, memory::data_type::s8,
(g == 1) ? chosen_memory_format : mkldnn::memory::format::goihw);
auto dst_dt = fuse_relu? paddle::framework::ToMKLDNNDataType(std::type_index(typeid(unsigned char))) : paddle::framework::ToMKLDNNDataType(std::type_index(typeid(signed char)));
if(fuse_residual_conn){
auto residual = ctx.Input<Tensor>("ResidualData");
auto residual_dt = paddle::framework::ToMKLDNNDataType(residual->type());
if(dst_dt != residual_dt)
dst_dt = residual_dt;
}
dst_md = platform::MKLDNNMemDesc(
dst_tz,
fuse_relu? paddle::framework::ToMKLDNNDataType(std::type_index(typeid(unsigned char))) :
paddle::framework::ToMKLDNNDataType(std::type_index(typeid(char))),
dst_tz,// memory::data_type::f32, chosen_memory_format);
dst_dt,//paddle::framework::ToMKLDNNDataType(std::type_index(typeid(unsigned char))),
chosen_memory_format);
//fuse_relu? paddle::framework::ToMKLDNNDataType(std::type_index(typeid(unsigned char))) :
//paddle::framework::ToMKLDNNDataType(std::type_index(typeid(signed char))),
//chosen_memory_format);
}
// create a conv primitive descriptor and save it for usage in backward
......@@ -486,7 +498,7 @@ std::cout<<key_conv_pd<<std::endl;
std::shared_ptr<mkldnn::memory> weights_memory_p;// = handler.AcquireWeightsMemoryFromPrimitive(
//user_weights_memory_p, pipeline, is_test);
if(is_INT8){
int mask_reorder = is_multi_channel? 0 : ((g!= 1) ? (1<<1)+(1<<0) : 1<<0);
int mask_reorder = is_multi_channel? ((g!= 1) ? (1<<1)+(1<<0) : 1<<0) : 0;
int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1;
std::vector<float> scale_weights_data(count);
for(int i=0; i<count; i++){
......@@ -503,17 +515,48 @@ std::cout<<key_conv_pd<<std::endl;
if(is_INT8){
if (fuse_residual_conn) {
auto residual_param = ctx.Input<Tensor>("ResidualData");
//auto residual_param_data = residual_param->data<T>();
PADDLE_ENFORCE_EQ(output->dims(), residual_param->dims(),
"Output and elementwise parameter need to have the "
"same dimension sizes");
//std::cout<<"output = "<<output<<" residual_param = "<<residual_param<<std::endl;
output->ShareDataWith(*residual_param);
if(fuse_relu){
auto residual_dt = paddle::framework::ToMKLDNNDataType(residual_param->type());
if(residual_dt == mkldnn::memory::data_type::u8){
uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
//std::cout<<"after share output = "<<output<<" residual_param = "<<residual_param<<std::endl;
//float scale_in_eltwise_data = *(scale_in_eltwise->data<float>());
printf("residual is u8: this is bottom 1 data\n");
//unsigned char* f = (unsigned char*)(residual_param_data);
//for(int i=0; i<50; i++){
// printf("%f ", (float)f[i]/scale_in_eltwise_data);
//}
//printf("\n");
//printf("this is output data\n");
//unsigned char* e = (unsigned char*)(output_data);
//for(int i=0; i<50; i++){
// printf("%f ", (float)e[i]/scale_in_eltwise_data);
//}
//printf("\n");
dst_memory_p =
handler.AcquireDstMemoryFromPrimitive(to_void_cast<uint8_t>(output_data));
} else{
int8_t* output_data = output->mutable_data<int8_t>(ctx.GetPlace());
//std::cout<<"after share output = "<<output<<" residual_param = "<<residual_param<<std::endl;
printf("residual is s8 : this is bottom 1 data\n");
//char* f = (char*)(residual_param_data);
//for(int i=0; i<50; i++){
// printf("%f ", (float)f[i]);
//}
//printf("\n");
//printf("this is output data\n");
//char* e = (char*)(output_data);
//for(int i=0; i<50; i++){
// printf("%f ", (float)e[i]);
//}
//printf("\n");
dst_memory_p =
handler.AcquireDstMemoryFromPrimitive(to_void_cast<int8_t>(output_data));
}
......@@ -563,7 +606,7 @@ std::cout<<"input fmt = "<<input->format()<<" input dt = "<<paddle::framework:
std::shared_ptr<mkldnn::memory> bias_memory_p;// =
//handler.AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline);
if(is_INT8){
int mask_reorder = is_multi_channel? 0 : 1<<0;
int mask_reorder = is_multi_channel? 1<<0 : 1;
int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1;
std::vector<float> scale_bias_data(count);
for(int i=0; i<count; i++){
......@@ -589,7 +632,10 @@ std::cout<<"input fmt = "<<input->format()<<" input dt = "<<paddle::framework:
output->set_layout(DataLayout::kMKLDNN);
output->set_format(GetMKLDNNFormat(*dst_memory_p));
//if(is_INT8){
// uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace()); //work aroud forsum fusion
// std::cout<<"output_data = "<<output_data<<std::endl;
//}
std::cout<<"input fmt = "<<input->format()<<" output fmt = "<<output->format()<<" dst fmt = "<<dst_memory_p->get_primitive_desc().desc().data.format<<"output dt = "<<paddle::framework::ToMKLDNNDataType(output->type())<<"dst dt = "<<dst_memory_p->get_primitive_desc().desc().data.data_type<<std::endl;
std::cout<<"this is conv end!!!!!!!!!!!!!!!!!!!!"<<std::endl;
}
......@@ -612,7 +658,7 @@ std::cout<<"input fmt = "<<input->format()<<" output fmt = "<<output->format()<
if (fuse_relu) {
constexpr float scale = 1.0f;
constexpr float negative_slope = 0.0f;
constexpr float placeholder = 0.0f; //beta
constexpr float placeholder = 1.0f; //beta
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu,
negative_slope, placeholder);
}
......
......@@ -49,6 +49,17 @@ std::cout<<"this is dequant op ***********"<<std::endl;
float* output_data = output->mutable_data<float>(ctx.GetPlace());
//T scale_data = *(scale->data<T>());
std::vector<float> scale_data = {*(scale->data<float>())};
std::vector<float> reorder_scale = {1.0f / scale_data[0]};
for(int i=0; i<50; i++){
printf("%d ", *(input_data+i));
}
printf("\n");fflush(stdout);
for(int i=0; i<50; i++){
printf("%f ", *(input_data+i)/scale_data[0]);
}
printf("\n");fflush(stdout);
std::cout<<"scale = "<<scale_data[0]<<std::endl;
std::vector<primitive> pipeline;
std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims());
......@@ -58,7 +69,7 @@ std::cout<<"this is dequant op ***********"<<std::endl;
mkldnn::primitive_attr attri;
int mask = 0;
attri.set_output_scales(mask, scale_data);
attri.set_output_scales(mask, reorder_scale);
auto src_md = platform::MKLDNNMemDesc(
{src_tz}, src_dt, src_fmt);
......@@ -75,6 +86,10 @@ std::cout<<"this is dequant op ***********"<<std::endl;
new reorder::primitive_desc(src_pd, dst_pd, attri));
auto reorder_p= std::shared_ptr<reorder>(new reorder(*reorder_pd, *src_memory_p, dst_memory));
pipeline.push_back(*reorder_p);
stream(stream::kind::eager).submit(pipeline).wait();
output->set_layout(DataLayout::kMKLDNN);
output->set_format(GetMKLDNNFormat(dst_memory));
}
};
......
......@@ -105,6 +105,16 @@ std::cout<<"this is pool op"<<std::endl;
const T* input_data = input->data<T>();
T* output_data = output->mutable_data<T>(ctx.GetPlace());
for(int i=0; i<50; i++){
printf("%f ",(float) *(input_data+i));
}
printf("\n");fflush(stdout);
for(int i=0; i<50; i++){
printf("%f ", *(input_data+i)/14.4791);
}
printf("\n");fflush(stdout);
std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims());
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
......@@ -193,12 +203,25 @@ std::cout<<"input type = "<<dt<<std::endl;
.data.format;
}
printf("befor submit!!!!!!!!!!!\n");
for(int i=0; i<50; i++){
printf("%f ", *(output_data+i)/14.4791);
}
printf("\n");fflush(stdout);
// push primitive to stream and wait until it's executed
std::vector<mkldnn::primitive> pipeline{*(pool_p.get())};
stream(stream::kind::eager).submit(pipeline).wait();
printf("after submit!!!!!!!!!!!\n");
for(int i=0; i<50; i++){
printf("%f ", *(output_data+i)/14.4791);
}
printf("\n");fflush(stdout);
output->set_layout(DataLayout::kMKLDNN);
output->set_format(output_format);
std::cout<<"input fmt = "<<input->format()<<" output fmt = "<<output->format()<<"output dt = "<<paddle::framework::ToMKLDNNDataType(output->type())<<std::endl;
}
private:
......
......@@ -36,9 +36,9 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<Tensor>("Input");
auto* scale = ctx.Input<Tensor>("Scale");
//auto* scale = ctx.Input<Tensor>("Scale");
auto* output = ctx.Output<Tensor>("Output");
std::cout<<"this is requantize op!!!!!!!!!!"<<std::endl;
auto& dev_ctx =
ctx.template device_context<platform::MKLDNNDeviceContext>();
const auto& engine = dev_ctx.GetEngine();
......@@ -47,18 +47,18 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims());
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
mkldnn::memory::data_type src_dt = paddle::framework::ToMKLDNNDataType(input->type());
mkldnn::memory::data_type dst_dt = paddle::framework::ToMKLDNNDataType(output->type());
mkldnn::memory::data_type dst_dt = mkldnn::memory::data_type::u8;//paddle::framework::ToMKLDNNDataType(output->type());
mkldnn::memory::format src_fmt = memory::format::nhwc;//input->format();
mkldnn::memory::format dst_fmt = memory::format::nhwc;//output->format();
const T* input_data = input->data<T>();
T* output_data = output->mutable_data<T>(ctx.GetPlace());
uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
//T scale_data = *(scale->data<T>());
std::vector<T> scale_data = {*(scale->data<T>())};
std::vector<float> scale_data = {0.9999999}; //{*(scale->data<float>())};
mkldnn::primitive_attr attri;
int mask = 0;
attri.set_output_scales(mask, scale_data);
attri.set_output_scales(mask,scale_data);// scale_data);
//attri.set_int_output_round_mode(round_nearest); //FIX ME
auto src_md = platform::MKLDNNMemDesc(
......@@ -70,13 +70,54 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
auto dst_md = platform::MKLDNNMemDesc(
{dst_tz}, dst_dt, dst_fmt);
auto dst_pd = mkldnn::memory::primitive_desc(dst_md, engine);
auto dst_memory = mkldnn::memory(dst_pd, to_void_cast<T>(output_data));
auto dst_memory = mkldnn::memory(dst_pd, to_void_cast<uint8_t>(output_data));
auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
new reorder::primitive_desc(dst_pd, src_pd, attri));
auto reorder_p= std::shared_ptr<reorder>(new reorder(*reorder_pd, *src_memory_p, dst_memory));
pipeline.push_back(*reorder_p);
new reorder::primitive_desc(src_pd, dst_pd, attri));
for(int i=0; i<50; i++){
printf("%d ", *(input_data+i));
}
printf("\n");fflush(stdout);
//for(int i=0; i<50; i++){
// printf("%f ", *(input_data+i)/107.426);
//}
//printf("\n");fflush(stdout);
std::cout<<"scale = "<<scale_data[0]<<std::endl;
//for(int i=0; i<50; i++){
// printf("%f ", *(output_data+i)/107.426);
//}
//printf("\n");fflush(stdout);
// int is_sum = false;//ctx.Attr<int>("is_sum");
// if(is_sum){
//std::cout<<"input fmt = "<<input->format()<<" output fmt = "<<output->format()<<"output dt = "<<paddle::framework::ToMKLDNNDataType(output->type())<<std::endl;
// output_data = (uint8_t*)input_data;
//std::cout<<"input fmt = "<<input->format()<<" output fmt = "<<output->format()<<"output dt = "<<paddle::framework::ToMKLDNNDataType(output->type())<<std::endl;
//
//printf("after*************\n");
//for(int i=0; i<50; i++){
// printf("%f ", *(output_data+i)/107.426);
//}
//printf("\n");fflush(stdout);
//
// } else{
auto reorder_p= std::shared_ptr<reorder>(new reorder(*reorder_pd, *src_memory_p, dst_memory));
pipeline.push_back(*reorder_p);
stream(stream::kind::eager).submit(pipeline).wait();
// }
//uint8_t* output_data_2 = output->mutable_data<uint8_t>(ctx.GetPlace());
//for(int i=0; i<50; i++){
// printf("%f ", *(output_data_2+i)/107.426);
//}
//printf("\n");fflush(stdout);
for(int i=0; i<50; i++){
printf("%d ", *(output_data+i));
}
printf("\n");fflush(stdout);
output->set_layout(DataLayout::kMKLDNN);
output->set_format(GetMKLDNNFormat(dst_memory));
std::cout<<"input fmt = "<<input->format()<<" output fmt = "<<output->format()<<"output dt = "<<paddle::framework::ToMKLDNNDataType(output->type())<<std::endl;
}
};
......@@ -113,4 +154,4 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR(requantize, ops::ReQuantOp, ops::ReQuantOpMaker, paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OP_KERNEL(requantize, MKLDNN, ::paddle::platform::CPUPlace, ops::ReQuantOpKernel<float>);
REGISTER_OP_KERNEL(requantize, MKLDNN, ::paddle::platform::CPUPlace, ops::ReQuantOpKernel<int8_t>);
......@@ -16,6 +16,7 @@ limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/operators/softmax_op.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/framework/data_layout_transform.h"
namespace paddle {
namespace operators {
......@@ -131,6 +132,13 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
const T* input_data = flattened_input.data<T>();
T* output_data = flattened_output.mutable_data<T>(ctx.GetPlace());
printf("this is softmax!!!!!!!!!!!!!!\n");
for(int i=0; i<50; i++){
printf("%f ", (float)*(input_data+i));
}
printf("\n");fflush(stdout);
std::cout<<"input fmt = "<<input->format()<<" input dt = "<<paddle::framework::ToMKLDNNDataType(input->type())<<" output fmt = "<<output->format()<<" output dt = "<<paddle::framework::ToMKLDNNDataType(output->type())<<std::endl;
std::vector<int> src_tz = paddle::framework::vectorize2int(flattened_dims);
std::vector<int> dst_tz = src_tz;
......@@ -162,7 +170,7 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
std::vector<primitive> pipeline{
*(static_cast<softmax_forward::primitive*>(softmax_p.get()))};
stream(stream::kind::eager).submit(pipeline).wait();
std::cout<<"input fmt = "<<input->format()<<" input dt = "<<paddle::framework::ToMKLDNNDataType(input->type())<<" output fmt = "<<output->format()<<" output dt = "<<paddle::framework::ToMKLDNNDataType(output->type())<<std::endl;
const bool is_test = ctx.Attr<bool>("is_test");
if (!is_test) {
T threshold = exp(-64);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册