From f836c8aa8f38eb592e93016d519ed292d9622448 Mon Sep 17 00:00:00 2001 From: Wojciech Uss Date: Sat, 28 Mar 2020 05:44:08 +0100 Subject: [PATCH] add check for scales and a message (#23119) --- .../framework/ir/mkldnn/cpu_quantize_pass.cc | 105 +++++++++--------- .../framework/ir/mkldnn/cpu_quantize_pass.h | 7 +- .../ir/mkldnn/cpu_quantize_pass_tester.cc | 40 ++++++- .../quantization/quantization_mkldnn_pass.py | 14 ++- 4 files changed, 113 insertions(+), 53 deletions(-) diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc index da9a28baa17..ba168c621f7 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc @@ -85,7 +85,7 @@ void CPUQuantizePass::QuantizeInput(Graph* g, Node* op, Node* input, } void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name, - VarQuantScale* scales, bool are_unsigned, + bool are_unsigned, std::string scale_attr_name) const { auto inputs = op->inputs; auto output = op->outputs[0]; @@ -99,7 +99,7 @@ void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name, std::vector quantize_out_nodes(inputs.size()); std::vector quantize_out_node_names(inputs.size()); - double scale_out = (*scales)[output->Name()].second.data()[0]; + double scale_out = GetScaleValueForNode(output); unsigned max = are_unsigned ? U8_MAX : S8_MAX; float scale = scale_out * max; @@ -169,6 +169,27 @@ void CPUQuantizePass::DequantizeOutput(Graph* g, Node* op, Node* output, if (!scale_attr_name.empty()) op->Op()->SetAttr(scale_attr_name, scale); } +std::pair CPUQuantizePass::GetScaleDataForNode( + const Node* node) const { + auto& scales = Get("quant_var_scales"); + PADDLE_ENFORCE_EQ( + scales.count(node->Name()), 1, + platform::errors::InvalidArgument( + "Quantization scale for the variable %s is missing.", node->Name())); + return scales[node->Name()]; +} + +LoDTensor CPUQuantizePass::GetScaleTensorForNode(const Node* node) const { + return GetScaleDataForNode(node).second; +} + +double CPUQuantizePass::GetScaleValueForNode(const Node* node, + bool* is_unsigned) const { + auto scale_data = GetScaleDataForNode(node); + if (is_unsigned != nullptr) *is_unsigned = scale_data.first; + return scale_data.second.data()[0]; +} + void CPUQuantizePass::QuantizeConv(Graph* graph, bool with_residual_data) const { GraphPatternDetector gpd; @@ -190,15 +211,12 @@ void CPUQuantizePass::QuantizeConv(Graph* graph, GET_IR_NODE_FROM_SUBGRAPH(conv_input, conv_input, conv_pattern); GET_IR_NODE_FROM_SUBGRAPH(conv_output, conv_output, conv_pattern); - // get scales calculated after warmup, they scale variables to MAX=1.0 - auto scales = Get("quant_var_scales"); - - auto input_scale = scales[conv_input->Name()].second.data()[0]; - bool is_input_unsigned = scales[conv_input->Name()].first; + bool is_input_unsigned{false}; + auto input_scale = GetScaleValueForNode(conv_input, &is_input_unsigned); QuantizeInput(g, conv_op, conv_input, "Input", input_scale, is_input_unsigned, "Scale_in"); - auto filter_scale_tensor = scales[conv_filter->Name()].second; + auto filter_scale_tensor = GetScaleTensorForNode(conv_filter); EigenVectorArrayMap eigen_tensor{filter_scale_tensor.data(), filter_scale_tensor.numel(), 1}; eigen_tensor *= static_cast(S8_MAX); @@ -211,16 +229,16 @@ void CPUQuantizePass::QuantizeConv(Graph* graph, if (with_residual_data) { GET_IR_NODE_FROM_SUBGRAPH(conv_residual_data, conv_residual_data, conv_pattern); + bool is_residual_unsigned{false}; auto residual_scale = - scales[conv_residual_data->Name()].second.data()[0]; - bool is_residual_unsigned = scales[conv_residual_data->Name()].first; + GetScaleValueForNode(conv_residual_data, &is_residual_unsigned); QuantizeInput(g, conv_op, conv_residual_data, "ResidualData", residual_scale, is_residual_unsigned, "Scale_in_eltwise"); } - auto output_scale = scales[conv_output->Name()].second.data()[0]; - bool is_output_unsigned = scales[conv_output->Name()].first; + bool is_output_unsigned{false}; + auto output_scale = GetScaleValueForNode(conv_output, &is_output_unsigned); DequantizeOutput(g, conv_op, conv_output, "Output", output_scale, is_output_unsigned, "Scale_out"); @@ -270,15 +288,12 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const { GET_IR_NODE_FROM_SUBGRAPH(input, input, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(output, output, fc_pattern); - // get scales calculated after warmup, they scale variables to MAX=1.0 - auto scales = Get("quant_var_scales"); - - auto input_scale = scales[input->Name()].second.data()[0]; - bool is_input_unsigned = scales[input->Name()].first; + bool is_input_unsigned{false}; + auto input_scale = GetScaleValueForNode(input, &is_input_unsigned); QuantizeInput(g, fc, input, "Input", input_scale, is_input_unsigned, "Scale_in"); - auto weight_scale_tensor = scales[weights->Name()].second; + auto weight_scale_tensor = GetScaleTensorForNode(weights); EigenVectorArrayMap eigen_tensor{weight_scale_tensor.data(), weight_scale_tensor.numel(), 1}; eigen_tensor *= static_cast(S8_MAX); @@ -288,8 +303,8 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const { fc->Op()->SetAttr("Scale_weights", filter_scale); - auto output_scale = scales[output->Name()].second.data()[0]; - bool is_output_unsigned = scales[output->Name()].first; + bool is_output_unsigned{false}; + auto output_scale = GetScaleValueForNode(output, &is_output_unsigned); DequantizeOutput(g, fc, output, "Out", output_scale, is_output_unsigned, "Scale_out"); @@ -323,15 +338,12 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const { GET_IR_NODE_FROM_SUBGRAPH(pool_input, pool_input, pool_pattern); GET_IR_NODE_FROM_SUBGRAPH(pool_output, pool_output, pool_pattern); - // get scales calculated after warmup, they scale variables to MAX=1.0 - auto scales = Get("quant_var_scales"); - - auto input_scale = scales[pool_input->Name()].second.data()[0]; - bool is_input_unsigned = scales[pool_input->Name()].first; + bool is_input_unsigned{false}; + auto input_scale = GetScaleValueForNode(pool_input, &is_input_unsigned); QuantizeInput(g, pool_op, pool_input, "X", input_scale, is_input_unsigned); - auto output_scale = scales[pool_output->Name()].second.data()[0]; - bool is_output_unsigned = scales[pool_output->Name()].first; + bool is_output_unsigned{false}; + auto output_scale = GetScaleValueForNode(pool_output, &is_output_unsigned); DequantizeOutput(g, pool_op, pool_output, "Out", output_scale, is_output_unsigned); @@ -362,15 +374,13 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const { GET_IR_NODE_FROM_SUBGRAPH(concat_out, concat_out, concat_pattern); - // get scales calculated after warmup, they scale variables to MAX=1.0 - auto scales = Get("quant_var_scales"); - // if all inputs were unsigned, then the output was set to unsigned // during the scale calculation step - bool are_all_inputs_unsigned = scales[concat_out->Name()].first; - QuantizeInputs(g, concat_op, "X", &scales, are_all_inputs_unsigned); + bool are_all_inputs_unsigned{false}; + auto output_scale = + GetScaleValueForNode(concat_out, &are_all_inputs_unsigned); - auto output_scale = scales[concat_out->Name()].second.data()[0]; + QuantizeInputs(g, concat_op, "X", are_all_inputs_unsigned); DequantizeOutput(g, concat_op, concat_out, "Out", output_scale, are_all_inputs_unsigned); @@ -403,11 +413,9 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const { GET_IR_NODE_FROM_SUBGRAPH(prior_box_input, prior_box_input, prior_box_pattern); - // get scales calculated after warmup, they scale variables to MAX=1.0 - auto scales = Get("quant_var_scales"); - - auto input_scale = scales[prior_box_input->Name()].second.data()[0]; - bool is_input_unsigned = scales[prior_box_input->Name()].first; + bool is_input_unsigned{false}; + auto input_scale = + GetScaleValueForNode(prior_box_input, &is_input_unsigned); QuantizeInput(g, prior_box_op, prior_box_input, "Input", input_scale, is_input_unsigned); @@ -451,15 +459,14 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const { GET_IR_NODE_FROM_SUBGRAPH(transpose_in, transpose_in, transpose_pattern); GET_IR_NODE_FROM_SUBGRAPH(transpose_out, transpose_out, transpose_pattern); - // get scales calculated after warmup, they scale variables to MAX=1.0 - auto scales = Get("quant_var_scales"); - auto input_scale = scales[transpose_in->Name()].second.data()[0]; - bool is_input_unsigned = scales[transpose_in->Name()].first; + bool is_input_unsigned{false}; + auto input_scale = GetScaleValueForNode(transpose_in, &is_input_unsigned); QuantizeInput(g, transpose_op, transpose_in, "X", input_scale, is_input_unsigned); - auto output_scale = scales[transpose_out->Name()].second.data()[0]; - bool is_output_unsigned = scales[transpose_out->Name()].first; + bool is_output_unsigned{false}; + auto output_scale = + GetScaleValueForNode(transpose_out, &is_output_unsigned); DequantizeOutput(g, transpose_op, transpose_out, "Out", output_scale, is_output_unsigned); @@ -504,15 +511,13 @@ void CPUQuantizePass::QuantizeReshape(Graph* graph) const { GET_IR_NODE_FROM_SUBGRAPH(reshape_in, reshape_in, reshape_pattern); GET_IR_NODE_FROM_SUBGRAPH(reshape_out, reshape_out, reshape_pattern); - // get scales calculated after warmup, they scale variables to MAX=1.0 - auto scales = Get("quant_var_scales"); - auto input_scale = scales[reshape_in->Name()].second.data()[0]; - bool is_input_unsigned = scales[reshape_in->Name()].first; + bool is_input_unsigned{false}; + auto input_scale = GetScaleValueForNode(reshape_in, &is_input_unsigned); QuantizeInput(g, reshape_op, reshape_in, "X", input_scale, is_input_unsigned); - auto output_scale = scales[reshape_out->Name()].second.data()[0]; - bool is_output_unsigned = scales[reshape_out->Name()].first; + bool is_output_unsigned{false}; + auto output_scale = GetScaleValueForNode(reshape_out, &is_output_unsigned); DequantizeOutput(g, reshape_op, reshape_out, "Out", output_scale, is_output_unsigned); diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h index 8ab3b4e378c..66a48e46e9e 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h @@ -64,7 +64,7 @@ class CPUQuantizePass : public FusePassBase { // quantize all inputs of given name with the same (minimum) scale void QuantizeInputs(Graph* g, Node* op, std::string input_name, - VarQuantScale* scales, bool are_unsigned, + bool are_unsigned, std::string scale_attr_name = "") const; void DequantizeOutput(Graph* g, Node* op, Node* output, @@ -72,6 +72,11 @@ class CPUQuantizePass : public FusePassBase { bool is_unsigned, std::string scale_attr_name = "") const; + std::pair GetScaleDataForNode(const Node* node) const; + LoDTensor GetScaleTensorForNode(const Node* node) const; + double GetScaleValueForNode(const Node* node, + bool* is_unsigned = nullptr) const; + const std::string name_scope_{"quantize"}; }; diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc index f5bf441d10c..26e675545e8 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc @@ -86,13 +86,15 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place, void PreparePass(std::unique_ptr* graph, const ProgramDesc& prog, const std::initializer_list variable_names, - int* original_nodes_num, int* current_nodes_num) { + int* original_nodes_num, int* current_nodes_num, + std::string var_without_scale = "") { auto place = paddle::platform::CPUPlace(); NaiveExecutor exe{place}; Scope scope; exe.CreateVariables(prog, 0, true, &scope); auto* scales = new VarQuantScale(); for (auto& v : variable_names) { + if (v.compare(var_without_scale) == 0) continue; InitTensorHolder(&scope, place, v.c_str()); LoDTensor tensor; tensor.Resize({1}); @@ -475,6 +477,42 @@ TEST(CpuQuantizePass, reshapeBetweenNonQuantizedOp) { transpose_count, reshape_count, quant_count, dequant_count, added_nodes_count, 2.0f * 127); } + +void MainTestCheckScales( + const ProgramDesc& prog, + const std::initializer_list variable_names, + const std::string& var_without_scale) { + std::unique_ptr graph(new ir::Graph(prog)); + std::stringstream error_msg_ss; + error_msg_ss << "Quantization scale for the variable " << var_without_scale + << " is missing."; + bool caught_exception = false; + try { + int original_nodes_num, current_nodes_num; + PreparePass(&graph, prog, variable_names, &original_nodes_num, + ¤t_nodes_num, var_without_scale); + } catch (paddle::platform::EnforceNotMet& error) { + caught_exception = true; + std::string ex_msg = error.what(); + EXPECT_NE(ex_msg.find(error_msg_ss.str()), std::string::npos); + } + EXPECT_TRUE(caught_exception); +} + +// (a, w)->Conv->o +ProgramDesc BuildProgramDescCheckScalesConv() { + ProgramDesc prog; + SetOp(&prog, "conv2d", "Conv", {"a", "w"}, {"o"}, true, true); + return prog; +} + +// Check if an exception with a proper message is thrown when quantization scale +// is missing for a variable +TEST(CPUQuantizePass, check_scales) { + const std::initializer_list var_names = {"a", "w", "o"}; + MainTestCheckScales(BuildProgramDescCheckScalesConv(), var_names, "a"); +} + } // namespace } // namespace ir diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_mkldnn_pass.py index 02750f9e83a..1d44db72ea2 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quantization_mkldnn_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quantization_mkldnn_pass.py @@ -423,8 +423,11 @@ class Qat2Int8MkldnnPass(object): return waiting_for_scale waiting_for_scale = _update_scales(graph) + waiting_for_scale_prev = set() - while len(waiting_for_scale) != 0: + while len(waiting_for_scale + ) != 0 and waiting_for_scale != waiting_for_scale_prev: + waiting_for_scale_prev = waiting_for_scale waiting_for_scale = _update_scales(graph) return graph @@ -547,7 +550,16 @@ class Qat2Int8MkldnnPass(object): tensor = self._scope.find_var(name).get_tensor() tensor.set(array, self._place) + def _remove_ctrl_vars(self, graph): + remove_ctr_vars = set() + for node in graph.all_var_nodes(): + if node.is_ctrl_var(): + remove_ctr_vars.add(node) + graph.safe_remove_nodes(remove_ctr_vars) + return graph + def _optimize_fp32_graph(self, graph): + graph = self._remove_ctrl_vars(graph) graph = self._apply_pass(graph, 'mkldnn_placement_pass', ['mkldnn_enabled_op_types'], [set()]) if self._is_conv_quantized(): -- GitLab