提交 9a5b560f 编写于 作者: X xiaoli.liu@intel.com

enable scale in op

上级 db32d125
......@@ -143,11 +143,11 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
conv_p = std::static_pointer_cast<mkldnn::convolution_forward>(dev_ctx.GetBlob(prim_key));
if(conv_p == nullptr){
if(is_INT8){
CreateINT8Primitive(ctx, is_test, dev_ctx, mkldnn_engine, input, filter,
CreateINT8Primitive(ctx, is_test, dev_ctx, mkldnn_engine, input, //filter,
bias, output,
strides, paddings,
dilations, fuse_relu,
fuse_residual_conn,// input_data,
fuse_residual_conn, input_data,
filter_data, src_tz,
weights_tz, g,
dst_tz, key,
......@@ -161,11 +161,11 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
handler,
force_fp32_output);
}else{
CreateFP32Primitive(ctx, is_test, dev_ctx, mkldnn_engine, input, filter,
CreateFP32Primitive(ctx, is_test, dev_ctx, mkldnn_engine, input, //filter,
bias, output,
strides, paddings,
dilations, fuse_relu,
fuse_residual_conn, //input_data,
fuse_residual_conn, input_data,
filter_data, src_tz,
weights_tz, g,
dst_tz, key,
......@@ -270,11 +270,11 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
paddle::framework::ExecutionContext ctx, bool is_test,
const paddle::platform::MKLDNNDeviceContext& dev_ctx,
const mkldnn::engine& mkldnn_engine,
const paddle::framework::Tensor* input, const paddle::framework::Tensor* filter,
const paddle::framework::Tensor* input,// const paddle::framework::Tensor* filter,
const paddle::framework::Tensor* bias, paddle::framework::Tensor* output,
std::vector<int> strides, std::vector<int> paddings,
std::vector<int> dilations, bool fuse_relu,
bool fuse_residual_conn, //const T* input_data,
bool fuse_residual_conn, const T* input_data,
const float* filter_data, std::vector<int> src_tz,
std::vector<int> weights_tz, int g,
std::vector<int> dst_tz, const std::string key,
......@@ -287,7 +287,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
std::shared_ptr<mkldnn::convolution_forward::primitive_desc> conv_pd,
std::shared_ptr<platform::ConvMKLDNNHandler> handler) const{
const T* input_data = input->data<T>();
//const T* input_data = input->data<T>();
auto user_src_md = platform::MKLDNNMemDesc(
{src_tz}, platform::MKLDNNGetDataType<T>(), input->format());
......@@ -405,11 +405,11 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
const paddle::framework::ExecutionContext& ctx, bool is_test,
const paddle::platform::MKLDNNDeviceContext & dev_ctx,
const mkldnn::engine & mkldnn_engine,
const paddle::framework::Tensor* input, const paddle::framework::Tensor* filter,
const paddle::framework::Tensor* input, //const paddle::framework::Tensor* filter,
const paddle::framework::Tensor* bias, paddle::framework::Tensor* output,
std::vector<int> strides, std::vector<int> paddings,
std::vector<int> dilations, bool fuse_relu,
bool fuse_residual_conn,// const T* input_data,
bool fuse_residual_conn, const T* input_data,
const float* filter_data, std::vector<int> src_tz,
std::vector<int> weights_tz, int g,
std::vector<int> dst_tz, const std::string key,
......@@ -422,14 +422,14 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
std::shared_ptr<mkldnn::convolution_forward::primitive_desc> conv_pd,
std::shared_ptr<platform::ConvMKLDNNHandler> handler,
bool force_fp32_output) const {
const T* input_data = input->data<T>();
//const T* input_data = input->data<T>();
bool is_INT8 = true;
auto* scale_in = ctx.HasInput("Scale_in") ? ctx.Input<Tensor>("Scale_in") : nullptr;
auto* scale_in_eltwise = ctx.HasInput("Scale_in_eltwise")? ctx.Input<Tensor>("Scale_in_eltwise") : nullptr;
auto* scale_weights = ctx.HasInput("Scale_weights")? ctx.Input<Tensor>("Scale_weights") : nullptr;
auto* scale_out = ctx.HasInput("Scale_out")? ctx.Input<Tensor>("Scale_out") : nullptr;
auto scale_in_data = ctx.Attr<float>("Scale_in");
auto scale_in_eltwise_data = ctx.Attr<float>("Scale_in_eltwise");
auto scale_weights_data = ctx.Attr<std::vector<float>>("Scale_weights");
auto scale_out_data = force_fp32_output? 1.0f : ctx.Attr<float>("Scale_out");
bool is_multi_channel = (scale_weights->memory_size() > 1) ? true : false;
bool is_multi_channel = scale_weights_data.size() > 1 ? true : false;
auto scale_in_key = key + "@scale_in";
auto scale_weights_key = key + "@scale_weights";
......@@ -437,34 +437,33 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto output_shift_scale_key = key + "@output_shift_scale";
auto sum_scale_key = key + "@sum_scale";
auto scale_in_eltwise_key = key + "@scale_in_eltwise";
std::vector<float> scale_in_data;
std::vector<float> scale_out_data = {1.0f};
std::vector<float> scale_weights_data;
std::vector<float> scale_in_eltwise_data;
//std::vector<float> scale_in_data;
//std::vector<float> scale_out_data = {1.0f};
//std::vector<float> scale_weights_data;
//std::vector<float> scale_in_eltwise_data;
std::vector<float> output_shift_scale;
std::vector<float> sum_scale = {1.0f};
std::vector<float> none_scale = {0};
float sum_scale = 1.0f;
int count = is_multi_channel? (g>1? weights_tz[1]*weights_tz[0] : weights_tz[0]) : 1;
scale_in_data = {*(scale_in->data<float>())};
scale_weights_data.resize(count);
#pragma omp parallel for if (count > 1)
for(int i=0; i<count; i++){
scale_weights_data[i] =*(scale_weights->data<float>() + i);
}
if(!force_fp32_output)
scale_out_data = {*(scale_out->data<float>())};
//scale_in_data = {scale_in};
//scale_weights_data.resize(count);
//#pragma omp parallel for if (count > 1)
//for(int i=0; i<count; i++){
//scale_weights_data[i] =*(scale_weights->data<float>() + i);
//}
//if(!force_fp32_output)
//scale_out_data = {*(scale_out->data<float>())};
output_shift_scale.resize(count);
#pragma omp parallel for if (count > 1)
for(int i=0; i<count; i++){
if(scale_weights_data[i] == 0.0)
output_shift_scale[i] = scale_out_data[0];
output_shift_scale[i] = scale_out_data;
else
output_shift_scale[i] = scale_out_data[0] / (scale_in_data[0] * scale_weights_data[i]);
output_shift_scale[i] = scale_out_data / (scale_in_data * scale_weights_data[i]);
}
if(fuse_residual_conn){
scale_in_eltwise_data = {*(scale_in_eltwise->data<float>())};
sum_scale[0] = scale_out_data[0] / scale_in_eltwise_data[0];
//scale_in_eltwise_data = {*(scale_in_eltwise->data<float>())};
sum_scale = scale_out_data / scale_in_eltwise_data;
}
auto user_src_md = platform::MKLDNNMemDesc(
......@@ -511,12 +510,12 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, bias_md, dst_md,
strides, paddings, mkldnn_engine,
fuse_relu, fuse_residual_conn,
output_shift_scale, sum_scale[0], is_test);
output_shift_scale, sum_scale, is_test);
} else {
conv_pd =
ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides, paddings,
mkldnn_engine, fuse_relu, fuse_residual_conn,
output_shift_scale, sum_scale[0], is_test);
output_shift_scale, sum_scale, is_test);
}
// Save conv_pd/src_memory/weights_memory for backward pass
dev_ctx.SetBlob(key_conv_pd, conv_pd);
......@@ -587,7 +586,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
scale_bias_data.resize(count);
#pragma omp parallel for if (count > 1)
for(int i=0; i<count; i++){
scale_bias_data[i] = scale_in_data[0] * scale_weights_data[i];
scale_bias_data[i] = scale_in_data * scale_weights_data[i];
}
bias_memory_p =
handler->AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline, is_test, is_INT8, scale_bias_data, mask_reorder);
......
......@@ -131,21 +131,14 @@ void Conv2DOpMaker::Make() {
"The format of output tensor is X (one-dimensional) of size equal"
"to the number of output channels. Only used with MKL-DNN.")
.AsDispensable();
AddInput("Scale_in",
"(Tensor) Scale_in to be used for int8 input data."
"Only used with INT8.")
.AsDispensable();
AddInput("Scale_in_eltwise",
"(Tensor) Scale_in_eltwise to be used for int8 eltwise input data."
"Only used with MKL-DNN.")
.AsDispensable();
AddInput("Scale_weights",
"(Tensor) Scale_weights to be used for int8 weights data."
"Only used with MKL-DNN.")
.AsDispensable();
AddInput("Scale_out",
"(Tensor) Scale_out to be used for int8 output data."
"Only used with MKL-DNN.")
AddOutput("Output",
"(Tensor) The output tensor of convolution operator. "
"The format of output tensor is also NCHW.");
AddInput("ResidualData",
"(Tensor) Tensor with residual data "
"to which convolution output will be added."
"Used with fuse_residual_connection fusion.")
.AsDispensable();
AddOutput("Output",
"(Tensor) The output tensor of convolution operator. "
......@@ -193,6 +186,22 @@ void Conv2DOpMaker::Make() {
"whenever convolution output is as an input to residual "
"connection.")
.SetDefault(false);
AddAttr<float>("Scale_in",
"Scale_in to be used for int8 input data."
"Only used with INT8.")
.SetDefault(1.0f);
AddAttr<float>("Scale_out",
"Scale_out to be used for int8 output data."
"Only used with MKL-DNN.")
.SetDefault(1.0f);
AddAttr<float>("Scale_in_eltwise",
"Scale_in_eltwise to be used for int8 eltwise input data."
"Only used with MKL-DNN.")
.SetDefault(1.0f);
AddAttr<std::vector<float>>("Scale_weights",
"Scale_weights to be used for int8 weights data."
"Only used with MKL-DNN.")
.SetDefault({1.0f});
AddAttr<bool>("force_fp32_output", "(bool, default false) Force INT8 kernel output FP32, only used in mkldnn kernel")
.SetDefault(false);
AddAttr<std::string>(
......
......@@ -37,7 +37,7 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<Tensor>("Input");
auto* scale = ctx.Input<Tensor>("Scale");
auto scale_data = ctx.Attr<float>("Scale");
auto* output = ctx.Output<Tensor>("Output");
auto& dev_ctx =
ctx.template device_context<platform::MKLDNNDeviceContext>();
......@@ -45,8 +45,7 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
const T* input_data = input->data<T>();
float* output_data = output->mutable_data<float>(ctx.GetPlace());
std::vector<float> scale_data = {*(scale->data<float>())};
std::vector<float> reorder_scale = {1.0f / scale_data[0]};
std::vector<float> reorder_scale = {1.0f / scale_data};
std::vector<primitive> pipeline;
std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims());
......@@ -99,8 +98,8 @@ framework::OpKernelType DeQuantOp::GetExpectedKernelType(const framework::Execut
void DeQuantOpMaker::Make() {
AddInput("Input","input data");
AddInput("Scale","scale data");
AddOutput("Output","output data");
AddAttr<float>("Scale","scale data").SetDefault({1.0f});
AddComment(R"DOC(This op will quantize data from INT8 to FP32)DOC");
}
......
......@@ -35,7 +35,7 @@ class QuantOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<Tensor>("Input");
auto* scale = ctx.Input<Tensor>("Scale");
auto scale_data = ctx.Attr<float>("Scale");
auto* output = ctx.Output<Tensor>("Output");
auto& dev_ctx =
ctx.template device_context<platform::MKLDNNDeviceContext>();
......@@ -47,11 +47,9 @@ class QuantOpKernel : public framework::OpKernel<T> {
const T* input_data = input->data<T>();
std::vector<T> scale_data = {*(scale->data<T>())};
mkldnn::primitive_attr attri;
int mask = 0;
attri.set_output_scales(mask, scale_data);
attri.set_output_scales(mask, {scale_data});
auto src_md = platform::MKLDNNMemDesc(
{src_tz}, memory::data_type::f32, input->format());
......@@ -108,11 +106,12 @@ framework::OpKernelType QuantOp::GetExpectedKernelType(const framework::Executio
void QuantOpMaker::Make() {
AddInput("Input","input data");
AddInput("Scale","scale data");
AddOutput("Output","output data");
AddAttr<bool>("is_negative_input",
"(bool, default false) Only used in mkldnn INT8 kernel")
.SetDefault(false);
AddAttr<float>("Scale","scale data")
.SetDefault({1.0f});
AddComment(R"DOC(This op will quantize data from FP32 to INT8)DOC");
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册