diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index 9210cecabe7c682a7b96b072b014e1ba041ca526..09dd426be2dafdc54b733bfc87b0940813e98399 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -1056,7 +1056,7 @@ struct ResidualElementwise : public PatternBase { }; // General struct for immutable ops: -// reshape, transpose, slice, nearest-interp +// reshape, transpose, slice, shape, nearest-interp // Forward pass for no weights-op. // immutable_out is a result of the operator. struct Immutable : public PatternBase { diff --git a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc index ed4d586b8bb5c3f9c5a097edfe2ded7c7e2be973..f7ee6a96dce047f3569503b6561a88a9c584270e 100644 --- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc @@ -372,7 +372,7 @@ std::unordered_set ComputePropagateScalesMkldnnPass::UpdateScales( const auto op_name = op_node->Name(); if (scale_immutable_ops.count(op_name)) { std::string input_name; - if (op_name == "slice") { + if (op_name == "slice" || op_name == "shape") { input_name = op_node->Op()->Input("Input")[0]; } else { input_name = op_node->Op()->Input("X")[0]; @@ -445,6 +445,7 @@ void ComputePropagateScalesMkldnnPass::ApplyImpl(ir::Graph* graph) const { "reshape2", "pool2d", "slice", + "shape", "nearest_interp", "nearest_interp_v2"}; diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc index 26a4478fff6835b36ca1383fdd764eee8041f7a0..7cfc3f3336d5fafcffc3a9d35669b6daadcafe6c 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc @@ -1136,6 +1136,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const { QuantizeImmutable(graph, "reshape2", "X"); QuantizeImmutable(graph, "transpose2", "X"); QuantizeImmutable(graph, "slice", "Input"); + QuantizeImmutable(graph, "shape", "Input"); QuantizeImmutable(graph, "nearest_interp", "X"); QuantizeImmutable(graph, "nearest_interp_v2", "X"); QuantizeElementwise(graph, "elementwise_add"); diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc index 322aa22c6ad14e4419ce5bd54262219c5f42dd97..ec7432e83f874c6ca28091b8a372aaaaffb9c2f7 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc @@ -66,7 +66,7 @@ void SetOp(ProgramDesc* prog, type == "nearest_interp" || type == "nearest_interp_v2") { op->SetInput("X", {inputs[0]}); op->SetOutput("Out", {outputs[0]}); - } else if (type == "slice") { + } else if (type == "slice" || type == "shape") { op->SetInput("Input", {inputs[0]}); op->SetOutput("Out", {outputs[0]}); } else if (type == "dropout") { @@ -550,8 +550,12 @@ void TestImmutableOpWithManyOutputs(const std::string tested_op) { SCALE * S8_MAX); } -const std::vector immutables = { - "reshape2", "transpose2", "slice", "nearest_interp", "nearest_interp_v2"}; +const std::vector immutables = {"reshape2", + "transpose2", + "slice", + "shape", + "nearest_interp", + "nearest_interp_v2"}; class TestImmutables : public testing::TestWithParam {}; diff --git a/paddle/fluid/inference/api/mkldnn_quantizer.cc b/paddle/fluid/inference/api/mkldnn_quantizer.cc index bca2cde0fc2c6ee38db8e8997b109c9676e68ea9..cef7402e6c061c586a49e17df5756a6a77a1f75a 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer.cc +++ b/paddle/fluid/inference/api/mkldnn_quantizer.cc @@ -142,7 +142,7 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateScalesForOpOutputs( scales_[var_name] = scales_[input_var_name]; } compute_scale = false; - } else if (op->Type() == "slice") { + } else if (op->Type() == "slice" || op->Type() == "shape") { auto input_var_name = op->Input("Input")[0]; PADDLE_ENFORCE_NE(scales_.find(input_var_name), scales_.end(), diff --git a/paddle/fluid/inference/api/mkldnn_quantizer_config.cc b/paddle/fluid/inference/api/mkldnn_quantizer_config.cc index d4fa78518e1498a9e91af3a80661db0c9ab33e2b..bfe6c5a94776a1c0001b167e12917fb56d24f3d4 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer_config.cc +++ b/paddle/fluid/inference/api/mkldnn_quantizer_config.cc @@ -45,6 +45,9 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() { rules_["slice"]["Input"] = ScaleAlgo::KL; rules_["slice"]["Out"] = ScaleAlgo::NONE; + rules_["shape"]["Input"] = ScaleAlgo::KL; + rules_["shape"]["Out"] = ScaleAlgo::NONE; + rules_["fc"]["Input"] = ScaleAlgo::KL; rules_["fc"]["W"] = ScaleAlgo::MAX_CH_T; rules_["fc"]["Bias"] = ScaleAlgo::NONE; @@ -62,6 +65,10 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() { rules_["elementwise_mul"]["Y"] = ScaleAlgo::KL; rules_["elementwise_mul"]["Out"] = ScaleAlgo::KL; + rules_["elementwise_sub"]["X"] = ScaleAlgo::KL; + rules_["elementwise_sub"]["Y"] = ScaleAlgo::KL; + rules_["elementwise_sub"]["Out"] = ScaleAlgo::KL; + // Reshape2 does not perform calculation on the data and shapes are not // changed. Scale is calculated on input data and assign to Quantize and // Dequantize scale. diff --git a/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py index 622d54343f6a0e3bf554ca6ee4b3b244ed80573a..0d17673a2d522df6be09525cf88c6e117c0450a4 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py @@ -63,8 +63,8 @@ class Quant2Int8MkldnnPass(object): self._op_ids_to_skip = _op_ids_to_skip if _op_ids_to_skip is not None else set( [-1]) self._scale_immutable_ops = [ - 'transpose2', 'reshape2', 'pool2d', 'slice', 'nearest_interp', - 'nearest_interp_v2' + 'transpose2', 'reshape2', 'pool2d', 'slice', 'shape', + 'nearest_interp', 'nearest_interp_v2' ] self._scale_ops = ['scale'] self._conv_ops = ['conv2d', 'depthwise_conv2d'] @@ -247,7 +247,7 @@ class Quant2Int8MkldnnPass(object): waiting_for_scale = set() for op in graph.all_op_nodes(): if op.name() in self._scale_immutable_ops: - if op.name() == 'slice': + if op.name() == 'slice' or op.name() == 'shape': input_name = op.input("Input")[0] else: input_name = op.input("X")[0]