diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc index 6be4ce566e01e9bcb89a38cbdc2bbd11551a065e..0644cf9bb6575462d2d8362713a4720d2684bf8d 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc @@ -39,11 +39,8 @@ void CPUQuantizePlacementPass::ApplyImpl(ir::Graph* graph) const { if (op->GetAttrIfExists("use_quantizer")) { op->SetAttr("mkldnn_data_type", std::string("int8")); } - if (op_types_list.empty()) { - op->SetAttr("mkldnn_data_type", std::string("int8")); - op->SetAttr("use_quantizer", true); - } else if (std::find(op_types_list.begin(), op_types_list.end(), - op->Type()) != op_types_list.end()) { + if (std::find(op_types_list.begin(), op_types_list.end(), op->Type()) != + op_types_list.end()) { op->SetAttr("mkldnn_data_type", std::string("int8")); op->SetAttr("use_quantizer", true); } @@ -61,7 +58,10 @@ REGISTER_PASS(cpu_quantize_placement_pass, // a vector of operator type names to be quantized ("conv2d" etc.) // the second param is the default value for this vector .DefaultPassAttr("quantize_enabled_op_types", - new std::unordered_set()) + new std::unordered_set( + {"concat", "conv2d", "elementwise_add", "fc", "matmul", + "pool2d", "prior_box", "relu", "reshape2", + "transpose2"})) // a vector of operator ids that are to be excluded from quantization // the second param is the default value for this vector .DefaultPassAttr("quantize_excluded_op_ids", new std::unordered_set()); diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc index 95e321e5b7190499f98c9df3dbef217310abcfcd..6977a9495853f9aa9a0680cafc51a170b848bb37 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc @@ -130,9 +130,9 @@ TEST(QuantizerPlacementPass, enabled_conv_excluded_one) { MainTest({"conv2d"}, {4}, 1); } -TEST(QuantizerPlacementPass, excluded_none) { - // all operators quantized - MainTest({}, {}, 6); +TEST(QuantizerPlacementPass, empty_list) { + // no operator quantized + MainTest({}, {}, 0); } TEST(QuantizerPlacementPass, default_attr_value) { diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc index 17e1e1958346155af32cf75b5e9fc25cdbdd91eb..7d99bb7d2b7a7049c67788df4c507afc14880815 100644 --- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc @@ -72,7 +72,7 @@ static mkldnn::memory::data_type GetDstType(bool is_int8, return dst_dt; } -template +template class ConvMKLDNNHandlerT : public platform::MKLDNNHandlerT { public: @@ -227,7 +227,7 @@ class ConvMKLDNNHandlerT platform::MKLDNNMemDesc(weights_tz, platform::MKLDNNGetDataType(), MKLDNNMemoryFormat::any); const auto dst_md = platform::MKLDNNMemDesc( - dst_tz, platform::MKLDNNGetDataType(), chosen_memory_format); + dst_tz, platform::MKLDNNGetDataType(), chosen_memory_format); const auto fwd_prop_kind = is_test ? mkldnn::prop_kind::forward_inference : mkldnn::prop_kind::forward_training; @@ -313,29 +313,29 @@ class ConvMKLDNNHandlerT if (is_test && weights_mem_p) { return weights_mem_p; } else { - const T* filter_data = filter->data(); + const K* filter_data = filter->data(); auto weights_tz = framework::vectorize(filter->dims()); GetWeightsTz(weights_tz, groups); auto user_src_md = platform::MKLDNNMemDesc( - weights_tz, platform::MKLDNNGetDataType(), + weights_tz, platform::MKLDNNGetDataType(), GetWeightsFormat(filter->format(), groups, is_conv3d)); return this->AcquireMemoryWithReorder( user_src_md, this->fwd_pd_->weights_desc(), - to_void_cast(filter_data), "@weights_mem_p", is_test); + to_void_cast(filter_data), "@weights_mem_p", is_test); } } std::shared_ptr AcquireBiasMemoryWithReorder( const framework::Tensor* bias, const bool is_test) { - const T* bias_data = bias->data(); + const K* bias_data = bias->data(); auto user_bias_md = platform::MKLDNNMemDesc( - framework::vectorize(bias->dims()), platform::MKLDNNGetDataType(), + framework::vectorize(bias->dims()), platform::MKLDNNGetDataType(), MKLDNNMemoryFormat::x); return this->AcquireMemoryWithReorder( - user_bias_md, this->fwd_pd_->bias_desc(), to_void_cast(bias_data), + user_bias_md, this->fwd_pd_->bias_desc(), to_void_cast(bias_data), "@bias_mem_p", is_test); } @@ -358,14 +358,14 @@ class ConvMKLDNNHandlerT if (residual_param->format() != platform::GetMKLDNNFormat(this->fwd_pd_->dst_desc())) { auto residual_memory_p = this->AcquireResidualMemory(residual_param); - dst_memory_p = this->AcquireDstMemory(output); + dst_memory_p = this->template AcquireDstMemory(output); this->AcquireReorder(residual_memory_p, dst_memory_p, "@residual_dst"); } else { // Changing ShareDataWith to TensorCopy results in performance drop // on ResNet architectures // (https://github.com/PaddlePaddle/Paddle/issues/22964) output->ShareDataWith(*residual_param); - dst_memory_p = this->AcquireDstMemory(output); + dst_memory_p = this->template AcquireDstMemory(output); } return dst_memory_p; } @@ -381,7 +381,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { bool is_INT8 = std::is_same::value || std::is_same::value; if (!is_INT8) { - ComputeFP32(ctx); + ComputeFP32(ctx); } else { std::string fuse_activation = ctx.Attr("fuse_activation"); bool fuse_residual_conn = ctx.Attr("fuse_residual_connection"); @@ -399,6 +399,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { } } + template void ComputeFP32(const paddle::framework::ExecutionContext& ctx) const { auto& dev_ctx = ctx.template device_context(); @@ -414,7 +415,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { ctx.HasInput("Bias") ? ctx.Input("Bias") : nullptr; auto* output = ctx.Output("Output"); - ConvMKLDNNHandlerT handler( + ConvMKLDNNHandlerT handler( ctx, dev_ctx, mkldnn_engine, ctx.GetPlace(), input, filter, bias, output, ctx.InputName("Input") + ctx.InputName("Filter")); @@ -429,7 +430,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { dst_memory_p = handler.AcquireDstMemoryWithResidual(output, residual_param); } else { - dst_memory_p = handler.AcquireDstMemory(output); + dst_memory_p = handler.template AcquireDstMemory(output); } auto conv_p = handler.AcquireForwardPrimitive(); diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 5d7143f56b3f394bb1a99c1b3802b7c20138dfb7..d1c5480c0f5438826e6eb5cc0de211ee1af74cf7 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -82,17 +82,21 @@ class MKLDNNHandlerT { fwd_pd_->src_desc(), to_void_cast(input_data), "@src_mem_p"); } + template std::shared_ptr AcquireDstMemory(framework::Tensor* output) { - T* ptr = output->mutable_data(place_, fwd_pd_->dst_desc().get_size()); + T_out* ptr = + output->mutable_data(place_, fwd_pd_->dst_desc().get_size()); return this->AcquireMemoryFromPrimitive(fwd_pd_->dst_desc(), ptr, "@dst_mem_p"); } + template std::shared_ptr AcquireDstMemory( const framework::Tensor* output) { - const T* output_data = output->data(); - return this->AcquireMemoryFromPrimitive( - bwd_pd_->dst_desc(), to_void_cast(output_data), "@bwd-dst_mem_p"); + const T_out* output_data = output->data(); + return this->AcquireMemoryFromPrimitive(bwd_pd_->dst_desc(), + to_void_cast(output_data), + "@bwd-dst_mem_p"); } std::shared_ptr AcquireDiffDstMemory(