未验证 提交 f836c8aa 编写于 作者: W Wojciech Uss 提交者: GitHub

add check for scales and a message (#23119)

上级 8bfd62ff
...@@ -85,7 +85,7 @@ void CPUQuantizePass::QuantizeInput(Graph* g, Node* op, Node* input, ...@@ -85,7 +85,7 @@ void CPUQuantizePass::QuantizeInput(Graph* g, Node* op, Node* input,
} }
void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name, void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name,
VarQuantScale* scales, bool are_unsigned, bool are_unsigned,
std::string scale_attr_name) const { std::string scale_attr_name) const {
auto inputs = op->inputs; auto inputs = op->inputs;
auto output = op->outputs[0]; auto output = op->outputs[0];
...@@ -99,7 +99,7 @@ void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name, ...@@ -99,7 +99,7 @@ void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name,
std::vector<Node*> quantize_out_nodes(inputs.size()); std::vector<Node*> quantize_out_nodes(inputs.size());
std::vector<std::string> quantize_out_node_names(inputs.size()); std::vector<std::string> quantize_out_node_names(inputs.size());
double scale_out = (*scales)[output->Name()].second.data<double>()[0]; double scale_out = GetScaleValueForNode(output);
unsigned max = are_unsigned ? U8_MAX : S8_MAX; unsigned max = are_unsigned ? U8_MAX : S8_MAX;
float scale = scale_out * max; float scale = scale_out * max;
...@@ -169,6 +169,27 @@ void CPUQuantizePass::DequantizeOutput(Graph* g, Node* op, Node* output, ...@@ -169,6 +169,27 @@ void CPUQuantizePass::DequantizeOutput(Graph* g, Node* op, Node* output,
if (!scale_attr_name.empty()) op->Op()->SetAttr(scale_attr_name, scale); if (!scale_attr_name.empty()) op->Op()->SetAttr(scale_attr_name, scale);
} }
std::pair<bool, LoDTensor> CPUQuantizePass::GetScaleDataForNode(
const Node* node) const {
auto& scales = Get<VarQuantScale>("quant_var_scales");
PADDLE_ENFORCE_EQ(
scales.count(node->Name()), 1,
platform::errors::InvalidArgument(
"Quantization scale for the variable %s is missing.", node->Name()));
return scales[node->Name()];
}
LoDTensor CPUQuantizePass::GetScaleTensorForNode(const Node* node) const {
return GetScaleDataForNode(node).second;
}
double CPUQuantizePass::GetScaleValueForNode(const Node* node,
bool* is_unsigned) const {
auto scale_data = GetScaleDataForNode(node);
if (is_unsigned != nullptr) *is_unsigned = scale_data.first;
return scale_data.second.data<double>()[0];
}
void CPUQuantizePass::QuantizeConv(Graph* graph, void CPUQuantizePass::QuantizeConv(Graph* graph,
bool with_residual_data) const { bool with_residual_data) const {
GraphPatternDetector gpd; GraphPatternDetector gpd;
...@@ -190,15 +211,12 @@ void CPUQuantizePass::QuantizeConv(Graph* graph, ...@@ -190,15 +211,12 @@ void CPUQuantizePass::QuantizeConv(Graph* graph,
GET_IR_NODE_FROM_SUBGRAPH(conv_input, conv_input, conv_pattern); GET_IR_NODE_FROM_SUBGRAPH(conv_input, conv_input, conv_pattern);
GET_IR_NODE_FROM_SUBGRAPH(conv_output, conv_output, conv_pattern); GET_IR_NODE_FROM_SUBGRAPH(conv_output, conv_output, conv_pattern);
// get scales calculated after warmup, they scale variables to MAX=1.0 bool is_input_unsigned{false};
auto scales = Get<VarQuantScale>("quant_var_scales"); auto input_scale = GetScaleValueForNode(conv_input, &is_input_unsigned);
auto input_scale = scales[conv_input->Name()].second.data<double>()[0];
bool is_input_unsigned = scales[conv_input->Name()].first;
QuantizeInput(g, conv_op, conv_input, "Input", input_scale, QuantizeInput(g, conv_op, conv_input, "Input", input_scale,
is_input_unsigned, "Scale_in"); is_input_unsigned, "Scale_in");
auto filter_scale_tensor = scales[conv_filter->Name()].second; auto filter_scale_tensor = GetScaleTensorForNode(conv_filter);
EigenVectorArrayMap eigen_tensor{filter_scale_tensor.data<double>(), EigenVectorArrayMap eigen_tensor{filter_scale_tensor.data<double>(),
filter_scale_tensor.numel(), 1}; filter_scale_tensor.numel(), 1};
eigen_tensor *= static_cast<double>(S8_MAX); eigen_tensor *= static_cast<double>(S8_MAX);
...@@ -211,16 +229,16 @@ void CPUQuantizePass::QuantizeConv(Graph* graph, ...@@ -211,16 +229,16 @@ void CPUQuantizePass::QuantizeConv(Graph* graph,
if (with_residual_data) { if (with_residual_data) {
GET_IR_NODE_FROM_SUBGRAPH(conv_residual_data, conv_residual_data, GET_IR_NODE_FROM_SUBGRAPH(conv_residual_data, conv_residual_data,
conv_pattern); conv_pattern);
bool is_residual_unsigned{false};
auto residual_scale = auto residual_scale =
scales[conv_residual_data->Name()].second.data<double>()[0]; GetScaleValueForNode(conv_residual_data, &is_residual_unsigned);
bool is_residual_unsigned = scales[conv_residual_data->Name()].first;
QuantizeInput(g, conv_op, conv_residual_data, "ResidualData", QuantizeInput(g, conv_op, conv_residual_data, "ResidualData",
residual_scale, is_residual_unsigned, "Scale_in_eltwise"); residual_scale, is_residual_unsigned, "Scale_in_eltwise");
} }
auto output_scale = scales[conv_output->Name()].second.data<double>()[0]; bool is_output_unsigned{false};
bool is_output_unsigned = scales[conv_output->Name()].first; auto output_scale = GetScaleValueForNode(conv_output, &is_output_unsigned);
DequantizeOutput(g, conv_op, conv_output, "Output", output_scale, DequantizeOutput(g, conv_op, conv_output, "Output", output_scale,
is_output_unsigned, "Scale_out"); is_output_unsigned, "Scale_out");
...@@ -270,15 +288,12 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const { ...@@ -270,15 +288,12 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH(input, input, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(input, input, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(output, output, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(output, output, fc_pattern);
// get scales calculated after warmup, they scale variables to MAX=1.0 bool is_input_unsigned{false};
auto scales = Get<VarQuantScale>("quant_var_scales"); auto input_scale = GetScaleValueForNode(input, &is_input_unsigned);
auto input_scale = scales[input->Name()].second.data<double>()[0];
bool is_input_unsigned = scales[input->Name()].first;
QuantizeInput(g, fc, input, "Input", input_scale, is_input_unsigned, QuantizeInput(g, fc, input, "Input", input_scale, is_input_unsigned,
"Scale_in"); "Scale_in");
auto weight_scale_tensor = scales[weights->Name()].second; auto weight_scale_tensor = GetScaleTensorForNode(weights);
EigenVectorArrayMap eigen_tensor{weight_scale_tensor.data<double>(), EigenVectorArrayMap eigen_tensor{weight_scale_tensor.data<double>(),
weight_scale_tensor.numel(), 1}; weight_scale_tensor.numel(), 1};
eigen_tensor *= static_cast<double>(S8_MAX); eigen_tensor *= static_cast<double>(S8_MAX);
...@@ -288,8 +303,8 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const { ...@@ -288,8 +303,8 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const {
fc->Op()->SetAttr("Scale_weights", filter_scale); fc->Op()->SetAttr("Scale_weights", filter_scale);
auto output_scale = scales[output->Name()].second.data<double>()[0]; bool is_output_unsigned{false};
bool is_output_unsigned = scales[output->Name()].first; auto output_scale = GetScaleValueForNode(output, &is_output_unsigned);
DequantizeOutput(g, fc, output, "Out", output_scale, is_output_unsigned, DequantizeOutput(g, fc, output, "Out", output_scale, is_output_unsigned,
"Scale_out"); "Scale_out");
...@@ -323,15 +338,12 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const { ...@@ -323,15 +338,12 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH(pool_input, pool_input, pool_pattern); GET_IR_NODE_FROM_SUBGRAPH(pool_input, pool_input, pool_pattern);
GET_IR_NODE_FROM_SUBGRAPH(pool_output, pool_output, pool_pattern); GET_IR_NODE_FROM_SUBGRAPH(pool_output, pool_output, pool_pattern);
// get scales calculated after warmup, they scale variables to MAX=1.0 bool is_input_unsigned{false};
auto scales = Get<VarQuantScale>("quant_var_scales"); auto input_scale = GetScaleValueForNode(pool_input, &is_input_unsigned);
auto input_scale = scales[pool_input->Name()].second.data<double>()[0];
bool is_input_unsigned = scales[pool_input->Name()].first;
QuantizeInput(g, pool_op, pool_input, "X", input_scale, is_input_unsigned); QuantizeInput(g, pool_op, pool_input, "X", input_scale, is_input_unsigned);
auto output_scale = scales[pool_output->Name()].second.data<double>()[0]; bool is_output_unsigned{false};
bool is_output_unsigned = scales[pool_output->Name()].first; auto output_scale = GetScaleValueForNode(pool_output, &is_output_unsigned);
DequantizeOutput(g, pool_op, pool_output, "Out", output_scale, DequantizeOutput(g, pool_op, pool_output, "Out", output_scale,
is_output_unsigned); is_output_unsigned);
...@@ -362,15 +374,13 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const { ...@@ -362,15 +374,13 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH(concat_out, concat_out, concat_pattern); GET_IR_NODE_FROM_SUBGRAPH(concat_out, concat_out, concat_pattern);
// get scales calculated after warmup, they scale variables to MAX=1.0
auto scales = Get<VarQuantScale>("quant_var_scales");
// if all inputs were unsigned, then the output was set to unsigned // if all inputs were unsigned, then the output was set to unsigned
// during the scale calculation step // during the scale calculation step
bool are_all_inputs_unsigned = scales[concat_out->Name()].first; bool are_all_inputs_unsigned{false};
QuantizeInputs(g, concat_op, "X", &scales, are_all_inputs_unsigned); auto output_scale =
GetScaleValueForNode(concat_out, &are_all_inputs_unsigned);
auto output_scale = scales[concat_out->Name()].second.data<double>()[0]; QuantizeInputs(g, concat_op, "X", are_all_inputs_unsigned);
DequantizeOutput(g, concat_op, concat_out, "Out", output_scale, DequantizeOutput(g, concat_op, concat_out, "Out", output_scale,
are_all_inputs_unsigned); are_all_inputs_unsigned);
...@@ -403,11 +413,9 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const { ...@@ -403,11 +413,9 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH(prior_box_input, prior_box_input, GET_IR_NODE_FROM_SUBGRAPH(prior_box_input, prior_box_input,
prior_box_pattern); prior_box_pattern);
// get scales calculated after warmup, they scale variables to MAX=1.0 bool is_input_unsigned{false};
auto scales = Get<VarQuantScale>("quant_var_scales"); auto input_scale =
GetScaleValueForNode(prior_box_input, &is_input_unsigned);
auto input_scale = scales[prior_box_input->Name()].second.data<double>()[0];
bool is_input_unsigned = scales[prior_box_input->Name()].first;
QuantizeInput(g, prior_box_op, prior_box_input, "Input", input_scale, QuantizeInput(g, prior_box_op, prior_box_input, "Input", input_scale,
is_input_unsigned); is_input_unsigned);
...@@ -451,15 +459,14 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const { ...@@ -451,15 +459,14 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH(transpose_in, transpose_in, transpose_pattern); GET_IR_NODE_FROM_SUBGRAPH(transpose_in, transpose_in, transpose_pattern);
GET_IR_NODE_FROM_SUBGRAPH(transpose_out, transpose_out, transpose_pattern); GET_IR_NODE_FROM_SUBGRAPH(transpose_out, transpose_out, transpose_pattern);
// get scales calculated after warmup, they scale variables to MAX=1.0 bool is_input_unsigned{false};
auto scales = Get<VarQuantScale>("quant_var_scales"); auto input_scale = GetScaleValueForNode(transpose_in, &is_input_unsigned);
auto input_scale = scales[transpose_in->Name()].second.data<double>()[0];
bool is_input_unsigned = scales[transpose_in->Name()].first;
QuantizeInput(g, transpose_op, transpose_in, "X", input_scale, QuantizeInput(g, transpose_op, transpose_in, "X", input_scale,
is_input_unsigned); is_input_unsigned);
auto output_scale = scales[transpose_out->Name()].second.data<double>()[0]; bool is_output_unsigned{false};
bool is_output_unsigned = scales[transpose_out->Name()].first; auto output_scale =
GetScaleValueForNode(transpose_out, &is_output_unsigned);
DequantizeOutput(g, transpose_op, transpose_out, "Out", output_scale, DequantizeOutput(g, transpose_op, transpose_out, "Out", output_scale,
is_output_unsigned); is_output_unsigned);
...@@ -504,15 +511,13 @@ void CPUQuantizePass::QuantizeReshape(Graph* graph) const { ...@@ -504,15 +511,13 @@ void CPUQuantizePass::QuantizeReshape(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH(reshape_in, reshape_in, reshape_pattern); GET_IR_NODE_FROM_SUBGRAPH(reshape_in, reshape_in, reshape_pattern);
GET_IR_NODE_FROM_SUBGRAPH(reshape_out, reshape_out, reshape_pattern); GET_IR_NODE_FROM_SUBGRAPH(reshape_out, reshape_out, reshape_pattern);
// get scales calculated after warmup, they scale variables to MAX=1.0 bool is_input_unsigned{false};
auto scales = Get<VarQuantScale>("quant_var_scales"); auto input_scale = GetScaleValueForNode(reshape_in, &is_input_unsigned);
auto input_scale = scales[reshape_in->Name()].second.data<double>()[0];
bool is_input_unsigned = scales[reshape_in->Name()].first;
QuantizeInput(g, reshape_op, reshape_in, "X", input_scale, QuantizeInput(g, reshape_op, reshape_in, "X", input_scale,
is_input_unsigned); is_input_unsigned);
auto output_scale = scales[reshape_out->Name()].second.data<double>()[0]; bool is_output_unsigned{false};
bool is_output_unsigned = scales[reshape_out->Name()].first; auto output_scale = GetScaleValueForNode(reshape_out, &is_output_unsigned);
DequantizeOutput(g, reshape_op, reshape_out, "Out", output_scale, DequantizeOutput(g, reshape_op, reshape_out, "Out", output_scale,
is_output_unsigned); is_output_unsigned);
......
...@@ -64,7 +64,7 @@ class CPUQuantizePass : public FusePassBase { ...@@ -64,7 +64,7 @@ class CPUQuantizePass : public FusePassBase {
// quantize all inputs of given name with the same (minimum) scale // quantize all inputs of given name with the same (minimum) scale
void QuantizeInputs(Graph* g, Node* op, std::string input_name, void QuantizeInputs(Graph* g, Node* op, std::string input_name,
VarQuantScale* scales, bool are_unsigned, bool are_unsigned,
std::string scale_attr_name = "") const; std::string scale_attr_name = "") const;
void DequantizeOutput(Graph* g, Node* op, Node* output, void DequantizeOutput(Graph* g, Node* op, Node* output,
...@@ -72,6 +72,11 @@ class CPUQuantizePass : public FusePassBase { ...@@ -72,6 +72,11 @@ class CPUQuantizePass : public FusePassBase {
bool is_unsigned, bool is_unsigned,
std::string scale_attr_name = "") const; std::string scale_attr_name = "") const;
std::pair<bool, LoDTensor> GetScaleDataForNode(const Node* node) const;
LoDTensor GetScaleTensorForNode(const Node* node) const;
double GetScaleValueForNode(const Node* node,
bool* is_unsigned = nullptr) const;
const std::string name_scope_{"quantize"}; const std::string name_scope_{"quantize"};
}; };
......
...@@ -86,13 +86,15 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place, ...@@ -86,13 +86,15 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog, void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
const std::initializer_list<std::string> variable_names, const std::initializer_list<std::string> variable_names,
int* original_nodes_num, int* current_nodes_num) { int* original_nodes_num, int* current_nodes_num,
std::string var_without_scale = "") {
auto place = paddle::platform::CPUPlace(); auto place = paddle::platform::CPUPlace();
NaiveExecutor exe{place}; NaiveExecutor exe{place};
Scope scope; Scope scope;
exe.CreateVariables(prog, 0, true, &scope); exe.CreateVariables(prog, 0, true, &scope);
auto* scales = new VarQuantScale(); auto* scales = new VarQuantScale();
for (auto& v : variable_names) { for (auto& v : variable_names) {
if (v.compare(var_without_scale) == 0) continue;
InitTensorHolder(&scope, place, v.c_str()); InitTensorHolder(&scope, place, v.c_str());
LoDTensor tensor; LoDTensor tensor;
tensor.Resize({1}); tensor.Resize({1});
...@@ -475,6 +477,42 @@ TEST(CpuQuantizePass, reshapeBetweenNonQuantizedOp) { ...@@ -475,6 +477,42 @@ TEST(CpuQuantizePass, reshapeBetweenNonQuantizedOp) {
transpose_count, reshape_count, quant_count, dequant_count, transpose_count, reshape_count, quant_count, dequant_count,
added_nodes_count, 2.0f * 127); added_nodes_count, 2.0f * 127);
} }
void MainTestCheckScales(
const ProgramDesc& prog,
const std::initializer_list<std::string> variable_names,
const std::string& var_without_scale) {
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
std::stringstream error_msg_ss;
error_msg_ss << "Quantization scale for the variable " << var_without_scale
<< " is missing.";
bool caught_exception = false;
try {
int original_nodes_num, current_nodes_num;
PreparePass(&graph, prog, variable_names, &original_nodes_num,
&current_nodes_num, var_without_scale);
} catch (paddle::platform::EnforceNotMet& error) {
caught_exception = true;
std::string ex_msg = error.what();
EXPECT_NE(ex_msg.find(error_msg_ss.str()), std::string::npos);
}
EXPECT_TRUE(caught_exception);
}
// (a, w)->Conv->o
ProgramDesc BuildProgramDescCheckScalesConv() {
ProgramDesc prog;
SetOp(&prog, "conv2d", "Conv", {"a", "w"}, {"o"}, true, true);
return prog;
}
// Check if an exception with a proper message is thrown when quantization scale
// is missing for a variable
TEST(CPUQuantizePass, check_scales) {
const std::initializer_list<std::string> var_names = {"a", "w", "o"};
MainTestCheckScales(BuildProgramDescCheckScalesConv(), var_names, "a");
}
} // namespace } // namespace
} // namespace ir } // namespace ir
......
...@@ -423,8 +423,11 @@ class Qat2Int8MkldnnPass(object): ...@@ -423,8 +423,11 @@ class Qat2Int8MkldnnPass(object):
return waiting_for_scale return waiting_for_scale
waiting_for_scale = _update_scales(graph) waiting_for_scale = _update_scales(graph)
waiting_for_scale_prev = set()
while len(waiting_for_scale) != 0: while len(waiting_for_scale
) != 0 and waiting_for_scale != waiting_for_scale_prev:
waiting_for_scale_prev = waiting_for_scale
waiting_for_scale = _update_scales(graph) waiting_for_scale = _update_scales(graph)
return graph return graph
...@@ -547,7 +550,16 @@ class Qat2Int8MkldnnPass(object): ...@@ -547,7 +550,16 @@ class Qat2Int8MkldnnPass(object):
tensor = self._scope.find_var(name).get_tensor() tensor = self._scope.find_var(name).get_tensor()
tensor.set(array, self._place) tensor.set(array, self._place)
def _remove_ctrl_vars(self, graph):
remove_ctr_vars = set()
for node in graph.all_var_nodes():
if node.is_ctrl_var():
remove_ctr_vars.add(node)
graph.safe_remove_nodes(remove_ctr_vars)
return graph
def _optimize_fp32_graph(self, graph): def _optimize_fp32_graph(self, graph):
graph = self._remove_ctrl_vars(graph)
graph = self._apply_pass(graph, 'mkldnn_placement_pass', graph = self._apply_pass(graph, 'mkldnn_placement_pass',
['mkldnn_enabled_op_types'], [set()]) ['mkldnn_enabled_op_types'], [set()])
if self._is_conv_quantized(): if self._is_conv_quantized():
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册