From 269db0d1d16c0079a0f7039a5717b27e2c139af6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Gallus?= Date: Fri, 31 Jan 2020 14:53:16 +0100 Subject: [PATCH] [DNNL] Fix accuracy in INT8 FC (#22404) * Enable quantize to reorder to nchw as well * Correct FC MKL-DNN input dim requirements to accept 3D * Improve DNNL FC format, error and 3D input handling test=develop * Improve error checking in FC test=develop * Improve PADDLE_ENFORCE messages in fc-related files * Remove data layout attribute from obligatory pass args test=develop * Fix message in fc_mkldnn_pass to be logically correct test=develop --- .../framework/ir/mkldnn/cpu_quantize_pass.cc | 3 + .../framework/ir/mkldnn/fc_mkldnn_pass.cc | 6 +- paddle/fluid/operators/fc_op.cc | 8 +- paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc | 76 +++++++++++++------ .../operators/mkldnn/quantize_mkldnn_op.cc | 9 ++- paddle/fluid/operators/quantize_op.cc | 3 + paddle/fluid/platform/mkldnn_reuse.h | 5 +- 7 files changed, 74 insertions(+), 36 deletions(-) diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc index a0324279a3..da9a28baa1 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc @@ -66,6 +66,9 @@ void CPUQuantizePass::QuantizeInput(Graph* g, Node* op, Node* input, std::vector({quantize_out_node->Name()})); q_desc.SetAttr("Scale", scale); q_desc.SetAttr("is_negative_input", !is_unsigned); + + q_desc.SetAttr("output_format", + Has("data_layout") ? Get("data_layout") : "NHWC"); auto quantize_op = g->CreateOpNode(&q_desc); // OpDesc will be copied. // update op's input diff --git a/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc index 9b71e2abd7..95afc54837 100644 --- a/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc @@ -56,14 +56,14 @@ void FCMKLDNNPass::ApplyImpl(ir::Graph* graph) const { OpDesc* desc = fc->Op(); auto dims = fc->inputs[0]->Var()->GetShape(); auto dim_num = dims.size(); - bool are_dims_supported = dim_num == 2 || dim_num == 4; + bool are_dims_supported = dim_num >= 2 && dim_num <= 4; constexpr size_t height_axis = 2; constexpr size_t width_axis = 3; bool is_size_supported = dim_num == 4 ? (dims[width_axis] == 1 && dims[height_axis] == 1) : true; if (!are_dims_supported || !is_size_supported) { - VLOG(3) << "Do not enable FC MKL-DNN for dimensions different than 2 & 4"; - VLOG(3) << "Or when width and height are different than one"; + VLOG(3) << "Do not enable FC MKL-DNN for dimensions different than" + "2, 3 & 4, or when width or height is different than one."; return; } desc->SetAttr("use_mkldnn", true); diff --git a/paddle/fluid/operators/fc_op.cc b/paddle/fluid/operators/fc_op.cc index 38af314986..f81ed30962 100644 --- a/paddle/fluid/operators/fc_op.cc +++ b/paddle/fluid/operators/fc_op.cc @@ -69,11 +69,13 @@ class FCOp : public framework::OperatorWithKernel { activation_type.c_str()); } if (ctx->Attrs().Get("use_mkldnn")) { - PADDLE_ENFORCE_EQ(in_dims.size() == 2 || in_dims.size() == 4, true, - "Fully Connected input should be 2-D or 4-D tensor."); + PADDLE_ENFORCE_EQ( + in_dims.size() >= 2 && in_dims.size() <= 4, true, + platform::errors::Unimplemented( + "Fully Connected input should be 2D, 3D or 4D tensor.")); } PADDLE_ENFORCE_EQ(w_dims.size(), 2, - "Fully Connected input should be 2-D tensor."); + "Fully Connected weights should be 2-D tensor."); int in_num_col_dims = ctx->Attrs().Get("in_num_col_dims"); PADDLE_ENFORCE_GT( in_dims.size(), in_num_col_dims, diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc index edc14add80..dcf0b996bd 100644 --- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc @@ -54,6 +54,25 @@ class FCPrimitiveFactory { return; } // Otherwise, create a new one. + auto in_col_dims = ctx.Attr("in_num_col_dims"); + PADDLE_ENFORCE_LE(in_col_dims, 2, + platform::errors::Unimplemented( + "DNNL FC doesn't support in_num_col_dims paramter to " + "be higher than " + "2.")); + if (in_col_dims == 2) { + PADDLE_ENFORCE_EQ( + input->dims().size(), 3, + platform::errors::Unimplemented( + "DNNL FC only supports in_num_col_dims equal to 2 when " + "3 dim input is provided.")); + PADDLE_ENFORCE_EQ( + input->format(), MKLDNNMemoryFormat::ncw, + platform::errors::Unimplemented( + "DNNL FC only supports in_num_col_dims equal to 2 when " + "input format is equal to ncw.")); + } + // Transform weights to default MKL-DNN format weights_ = TransposeWeights(weights); // Since MKL-DNN has a lot of limitations on what the input/weights/output @@ -121,6 +140,33 @@ class FCPrimitiveFactory { } private: + // DNNL always returns 2-dimensional data block as a result of computing + // inner product. Hence the format 'nc' is always set for its output + // primitive. Therefore, function SetOutputFormat is needed to choose + // an appropriate format based on the number of input dimensions and + // format of an input tensor. + void SetOutputFormat(MKLDNNMemoryFormat in_format, Tensor* out) { + int dim_num = out->dims().size(); + // In case of 2 dims, we set the only possible format, nc + if (dim_num == 2) { + out->set_format(MKLDNNMemoryFormat::nc); + // In case of 3 dims, we generate a format that is based on number + // of output dims and the layout of input format (nchw or nhwc). + } else if (dim_num == 3) { + if (in_format == MKLDNNMemoryFormat::nwc || + in_format == MKLDNNMemoryFormat::nhwc) { + out->set_format( + platform::MKLDNNFormatForSize(dim_num, MKLDNNMemoryFormat::nhwc)); + } else { + out->set_format( + platform::MKLDNNFormatForSize(dim_num, MKLDNNMemoryFormat::nchw)); + } + // In any other case we overwrite the output format with the input one. + } else { + out->set_format(in_format); + } + } + void UpdateDataPointers(const ExecutionContext& ctx, Tensor* out, const Tensor* in) { input_->set_data_handle(to_void_cast(in->data())); @@ -129,17 +175,7 @@ class FCPrimitiveFactory { // variable, update its format to what has been determined in first // call to CreateFcPrimitive method. if (out->format() == MKLDNNMemoryFormat::undef) { - MKLDNNMemoryFormat format; - auto data_type = input_->get_desc().data.data_type; - if (data_type == mkldnn_f32) - format = MKLDNNMemoryFormat::nchw; - else - format = MKLDNNMemoryFormat::nhwc; - - MKLDNNMemoryFormat selected = platform::MKLDNNFormatForSize( - framework::vectorize(out->dims()).size(), format); - - out->set_format(selected); + SetOutputFormat(in->format(), out); } } @@ -168,8 +204,8 @@ class FCPrimitiveFactory { const LoDTensor* input, const Tensor* weights, const Tensor* bias, LoDTensor* output, const ExecutionContext& ctx) { auto input_dims = framework::vectorize(input->dims()); - std::vector new_input_dims = {input_dims[0] * input_dims[1], 1, - input_dims[2]}; + std::vector new_input_dims = {input_dims[0] * input_dims[1], + input_dims[2], 1}; auto src_desc = CreateMemDescriptor(new_input_dims, input->format()); auto weight_dims = Get3DWeightDimsForDNNL(weights); @@ -187,7 +223,7 @@ class FCPrimitiveFactory { std::vector Get3DWeightDimsForDNNL(const Tensor* weights) { auto paddle_w_dims = framework::vectorize(weights->dims()); - return {paddle_w_dims[1], 1, paddle_w_dims[0]}; + return {paddle_w_dims[1], paddle_w_dims[0], 1}; } memory::desc Create3DUserWeightsDesc(const Tensor* weights) { @@ -405,18 +441,8 @@ class FCPrimitiveFactory { T_out* output_data = output->mutable_data(ctx.GetPlace(), buffer_size); memory dst_mem(dst_desc, engine_, to_void_cast(output_data)); + SetOutputFormat(ctx.Input("Input")->format(), output); - MKLDNNMemoryFormat format; - auto data_type = input_->get_desc().data.data_type; - if (data_type == mkldnn_f32) - format = MKLDNNMemoryFormat::nchw; - else - format = MKLDNNMemoryFormat::nhwc; - - MKLDNNMemoryFormat selected = platform::MKLDNNFormatForSize( - framework::vectorize(output->dims()).size(), format); - - output->set_format(selected); return dst_mem; } diff --git a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc index be5c639829..55bd683f8f 100644 --- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc @@ -60,6 +60,9 @@ class QuantOpKernel : public framework::OpKernel { reorder_p = std::static_pointer_cast(dev_ctx.GetBlob(key_prim)); if (reorder_p == nullptr) { + std::string out_layout = ctx.Attr("output_format"); + MKLDNNMemoryFormat out_format = + platform::data_format_to_memory_format(out_layout); mkldnn::primitive_attr attri; int mask = 0; attri.set_output_scales(mask, {scale_data}); @@ -72,10 +75,10 @@ class QuantOpKernel : public framework::OpKernel { std::shared_ptr dst_md; if (is_negative) { platform::SetDstMemoryQuantized(ctx, output, dst_tz, engine, - dst_md, dst_memory); + dst_md, dst_memory, out_format); } else { - platform::SetDstMemoryQuantized(ctx, output, dst_tz, engine, - dst_md, dst_memory); + platform::SetDstMemoryQuantized( + ctx, output, dst_tz, engine, dst_md, dst_memory, out_format); } auto reorder_pd = std::shared_ptr( new reorder::primitive_desc(*src_memory, *dst_memory, attri)); diff --git a/paddle/fluid/operators/quantize_op.cc b/paddle/fluid/operators/quantize_op.cc index 69264e3a45..8924e21b46 100644 --- a/paddle/fluid/operators/quantize_op.cc +++ b/paddle/fluid/operators/quantize_op.cc @@ -37,6 +37,9 @@ void QuantOpMaker::Make() { "(bool, default false) Only used in mkldnn INT8 kernel") .SetDefault(false); AddAttr("Scale", "scale data").SetDefault({1.0f}); + AddAttr("output_format", + "Convert format to NHWC or NCHW during quantization.") + .SetDefault("NHWC"); AddComment(R"DOC(This op will quantize data from FP32 to INT8)DOC"); } diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 27756ed011..f8ee9b9639 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -1143,13 +1143,14 @@ static void SetDstMemoryQuantized( const framework::ExecutionContext& ctx, framework::Tensor* output, std::vector dst_tz, const mkldnn::engine& engine, std::shared_ptr& dst_md, // NOLINT - std::shared_ptr& dst_memory) { // NOLINT + std::shared_ptr& dst_memory, // NOLINT + MKLDNNMemoryFormat output_format) { T* output_data = output->mutable_data(ctx.GetPlace()); const size_t dst_dims = dst_tz.size(); MKLDNNMemoryFormat dst_fmt; PADDLE_ENFORCE_LE(dst_dims, 5, "Dst memory for quantization can not have dims > 5"); - dst_fmt = platform::MKLDNNFormatForSize(dst_dims, MKLDNNMemoryFormat::nhwc); + dst_fmt = platform::MKLDNNFormatForSize(dst_dims, output_format); auto tmp_dst_md = platform::MKLDNNMemDesc( {dst_tz}, paddle::framework::ToMKLDNNDataType( -- GitLab