From d4372a1e4b0d85d5e252aaffb4bafec21158e1da Mon Sep 17 00:00:00 2001 From: Zuza Gawrysiak Date: Mon, 11 Jul 2022 13:23:20 +0200 Subject: [PATCH] Quantize shape operator (#44124) * Quantize shape operator * Add shape op to propagate scales pass --- paddle/fluid/framework/ir/graph_pattern_detector.h | 2 +- .../ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc | 3 ++- paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc | 1 + .../framework/ir/mkldnn/cpu_quantize_pass_tester.cc | 10 +++++++--- paddle/fluid/inference/api/mkldnn_quantizer.cc | 2 +- paddle/fluid/inference/api/mkldnn_quantizer_config.cc | 7 +++++++ .../slim/quantization/quant2_int8_mkldnn_pass.py | 6 +++--- 7 files changed, 22 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index 9210cecabe..09dd426be2 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -1056,7 +1056,7 @@ struct ResidualElementwise : public PatternBase { }; // General struct for immutable ops: -// reshape, transpose, slice, nearest-interp +// reshape, transpose, slice, shape, nearest-interp // Forward pass for no weights-op. // immutable_out is a result of the operator. struct Immutable : public PatternBase { diff --git a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc index ed4d586b8b..f7ee6a96dc 100644 --- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc @@ -372,7 +372,7 @@ std::unordered_set ComputePropagateScalesMkldnnPass::UpdateScales( const auto op_name = op_node->Name(); if (scale_immutable_ops.count(op_name)) { std::string input_name; - if (op_name == "slice") { + if (op_name == "slice" || op_name == "shape") { input_name = op_node->Op()->Input("Input")[0]; } else { input_name = op_node->Op()->Input("X")[0]; @@ -445,6 +445,7 @@ void ComputePropagateScalesMkldnnPass::ApplyImpl(ir::Graph* graph) const { "reshape2", "pool2d", "slice", + "shape", "nearest_interp", "nearest_interp_v2"}; diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc index 26a4478fff..7cfc3f3336 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc @@ -1136,6 +1136,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const { QuantizeImmutable(graph, "reshape2", "X"); QuantizeImmutable(graph, "transpose2", "X"); QuantizeImmutable(graph, "slice", "Input"); + QuantizeImmutable(graph, "shape", "Input"); QuantizeImmutable(graph, "nearest_interp", "X"); QuantizeImmutable(graph, "nearest_interp_v2", "X"); QuantizeElementwise(graph, "elementwise_add"); diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc index 322aa22c6a..ec7432e83f 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc @@ -66,7 +66,7 @@ void SetOp(ProgramDesc* prog, type == "nearest_interp" || type == "nearest_interp_v2") { op->SetInput("X", {inputs[0]}); op->SetOutput("Out", {outputs[0]}); - } else if (type == "slice") { + } else if (type == "slice" || type == "shape") { op->SetInput("Input", {inputs[0]}); op->SetOutput("Out", {outputs[0]}); } else if (type == "dropout") { @@ -550,8 +550,12 @@ void TestImmutableOpWithManyOutputs(const std::string tested_op) { SCALE * S8_MAX); } -const std::vector immutables = { - "reshape2", "transpose2", "slice", "nearest_interp", "nearest_interp_v2"}; +const std::vector immutables = {"reshape2", + "transpose2", + "slice", + "shape", + "nearest_interp", + "nearest_interp_v2"}; class TestImmutables : public testing::TestWithParam {}; diff --git a/paddle/fluid/inference/api/mkldnn_quantizer.cc b/paddle/fluid/inference/api/mkldnn_quantizer.cc index bca2cde0fc..cef7402e6c 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer.cc +++ b/paddle/fluid/inference/api/mkldnn_quantizer.cc @@ -142,7 +142,7 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateScalesForOpOutputs( scales_[var_name] = scales_[input_var_name]; } compute_scale = false; - } else if (op->Type() == "slice") { + } else if (op->Type() == "slice" || op->Type() == "shape") { auto input_var_name = op->Input("Input")[0]; PADDLE_ENFORCE_NE(scales_.find(input_var_name), scales_.end(), diff --git a/paddle/fluid/inference/api/mkldnn_quantizer_config.cc b/paddle/fluid/inference/api/mkldnn_quantizer_config.cc index d4fa78518e..bfe6c5a947 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer_config.cc +++ b/paddle/fluid/inference/api/mkldnn_quantizer_config.cc @@ -45,6 +45,9 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() { rules_["slice"]["Input"] = ScaleAlgo::KL; rules_["slice"]["Out"] = ScaleAlgo::NONE; + rules_["shape"]["Input"] = ScaleAlgo::KL; + rules_["shape"]["Out"] = ScaleAlgo::NONE; + rules_["fc"]["Input"] = ScaleAlgo::KL; rules_["fc"]["W"] = ScaleAlgo::MAX_CH_T; rules_["fc"]["Bias"] = ScaleAlgo::NONE; @@ -62,6 +65,10 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() { rules_["elementwise_mul"]["Y"] = ScaleAlgo::KL; rules_["elementwise_mul"]["Out"] = ScaleAlgo::KL; + rules_["elementwise_sub"]["X"] = ScaleAlgo::KL; + rules_["elementwise_sub"]["Y"] = ScaleAlgo::KL; + rules_["elementwise_sub"]["Out"] = ScaleAlgo::KL; + // Reshape2 does not perform calculation on the data and shapes are not // changed. Scale is calculated on input data and assign to Quantize and // Dequantize scale. diff --git a/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py index 622d54343f..0d17673a2d 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py @@ -63,8 +63,8 @@ class Quant2Int8MkldnnPass(object): self._op_ids_to_skip = _op_ids_to_skip if _op_ids_to_skip is not None else set( [-1]) self._scale_immutable_ops = [ - 'transpose2', 'reshape2', 'pool2d', 'slice', 'nearest_interp', - 'nearest_interp_v2' + 'transpose2', 'reshape2', 'pool2d', 'slice', 'shape', + 'nearest_interp', 'nearest_interp_v2' ] self._scale_ops = ['scale'] self._conv_ops = ['conv2d', 'depthwise_conv2d'] @@ -247,7 +247,7 @@ class Quant2Int8MkldnnPass(object): waiting_for_scale = set() for op in graph.all_op_nodes(): if op.name() in self._scale_immutable_ops: - if op.name() == 'slice': + if op.name() == 'slice' or op.name() == 'shape': input_name = op.input("Input")[0] else: input_name = op.input("X")[0] -- GitLab