From e4d475eabd83e7a6fa1e88c64c28747450f87d66 Mon Sep 17 00:00:00 2001 From: "joanna.wozna.intel" Date: Tue, 8 Feb 2022 12:36:07 +0100 Subject: [PATCH] [Bug fix] Fixed handling of one of the cases in the quantization process (#39342) * Fix quantization next op findings * Corrections according to the review --- .../framework/ir/graph_pattern_detector.cc | 12 --- .../framework/ir/graph_pattern_detector.h | 4 - .../framework/ir/mkldnn/cpu_quantize_pass.cc | 85 ++++++++++--------- .../ir/mkldnn/cpu_quantize_pass_tester.cc | 56 +++++++++++- 4 files changed, 99 insertions(+), 58 deletions(-) diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 8c4965fc402..63559e20159 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -1592,11 +1592,8 @@ PDNode *patterns::Transpose::operator()() { ->AsOutput() ->assert_is_op_output("transpose2", "Out"); - auto next_op = pattern->NewNode(next_op_repr())->assert_is_op(); - prev_op->LinksTo({transpose_in}); transpose_op->LinksFrom({transpose_in}).LinksTo({transpose_out}); - next_op->LinksFrom({transpose_out}); return transpose_out; } @@ -1613,11 +1610,8 @@ PDNode *patterns::Reshape::operator()() { ->AsOutput() ->assert_is_op_output("reshape2", "Out"); - auto next_op = pattern->NewNode(next_op_repr())->assert_is_op(); - prev_op->LinksTo({reshape_in}); reshape_op->LinksFrom({reshape_in}).LinksTo({reshape_out}); - next_op->LinksFrom({reshape_out}); return reshape_out; } @@ -1633,11 +1627,8 @@ PDNode *patterns::Slice::operator()() { ->AsOutput() ->assert_is_op_output("slice", "Out"); - auto next_op = pattern->NewNode(next_op_repr())->assert_is_op(); - prev_op->LinksTo({slice_in}); slice_op->LinksFrom({slice_in}).LinksTo({slice_out}); - next_op->LinksFrom({slice_out}); return slice_out; } @@ -1658,12 +1649,9 @@ PDNode *patterns::NearestInterp::operator()() { ->assert_is_ops_output({"nearest_interp", "nearest_interp_v2"}, "Out"); - auto next_op = pattern->NewNode(next_op_repr())->assert_is_op(); - prev_op->LinksTo({nearest_interp_in}); nearest_interp_op->LinksFrom({nearest_interp_in}) .LinksTo({nearest_interp_out}); - next_op->LinksFrom({nearest_interp_out}); return nearest_interp_out; } diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index 5b996a3ab91..79f1d63a151 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -963,7 +963,6 @@ struct Transpose : public PatternBase { PATTERN_DECL_NODE(transpose_in); PATTERN_DECL_NODE(transpose_op); PATTERN_DECL_NODE(transpose_out); - PATTERN_DECL_NODE(next_op); }; // Reshape op @@ -978,7 +977,6 @@ struct Reshape : public PatternBase { PATTERN_DECL_NODE(reshape_in); PATTERN_DECL_NODE(reshape_op); PATTERN_DECL_NODE(reshape_out); - PATTERN_DECL_NODE(next_op); }; // Slice op // Forward pass for slice. @@ -992,7 +990,6 @@ struct Slice : public PatternBase { PATTERN_DECL_NODE(slice_in); PATTERN_DECL_NODE(slice_op); PATTERN_DECL_NODE(slice_out); - PATTERN_DECL_NODE(next_op); }; // Nearest Interp op @@ -1007,7 +1004,6 @@ struct NearestInterp : public PatternBase { PATTERN_DECL_NODE(nearest_interp_in); PATTERN_DECL_NODE(nearest_interp_op); PATTERN_DECL_NODE(nearest_interp_out); - PATTERN_DECL_NODE(next_op); }; // Matmul op diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc index 0f12261d3c4..5ff858d5ae0 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc @@ -274,8 +274,12 @@ bool CPUQuantizePass::IsOpDequantized(const Node* node) const { } bool CPUQuantizePass::IsOpQuantized(const Node* node) const { - return node->Op()->Type() == "quantize" || - platform::HasOpINT8DataType(node->Op()); + // return true only if all of outputs are ops and their are either quantize or + // have int8 data type + return all_of(node->outputs.begin(), node->outputs.end(), [](Node* output) { + return (output->IsOp() && (output->Op()->Type() == "quantize" || + platform::HasOpINT8DataType(output->Op()))); + }); } void CPUQuantizePass::QuantizeConv(Graph* graph, @@ -314,7 +318,7 @@ void CPUQuantizePass::QuantizeConv(Graph* graph, conv_pattern); if (!AreScalesPresentForNodes( {conv_input, conv_filter, conv_residual_data})) { - LogCannotQuantizeOp(conv_op); + LogCannotQuantizeOp(conv_op, "No scale available for the operator"); return; } @@ -326,7 +330,7 @@ void CPUQuantizePass::QuantizeConv(Graph* graph, residual_scale, is_residual_unsigned, "Scale_in_eltwise"); } else { if (!AreScalesPresentForNodes({conv_input, conv_filter})) { - LogCannotQuantizeOp(conv_op); + LogCannotQuantizeOp(conv_op, "No scale available for the operator"); return; } } @@ -401,6 +405,7 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const { return; } if (!fc->Op()->GetAttrIfExists("use_mkldnn")) { + LogCannotQuantizeOp(fc, "use_mkldnn attribute set to false"); return; } @@ -409,7 +414,7 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const { GET_IR_NODE_FROM_SUBGRAPH(output, output, fc_pattern); if (!AreScalesPresentForNodes({input, weights})) { - LogCannotQuantizeOp(fc); + LogCannotQuantizeOp(fc, "No scale available for the operator"); return; } @@ -471,7 +476,7 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const { GET_IR_NODE_FROM_SUBGRAPH(pool_output, pool_output, pool_pattern); if (!AreScalesPresentForNodes({pool_input, pool_output})) { - LogCannotQuantizeOp(pool_op); + LogCannotQuantizeOp(pool_op, "No scale available for the operator"); return; } @@ -514,7 +519,7 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const { GET_IR_NODE_FROM_SUBGRAPH(concat_out, concat_out, concat_pattern); if (!AreScalesPresentForNodes({concat_out})) { - LogCannotQuantizeOp(concat_op); + LogCannotQuantizeOp(concat_op, "No scale available for the operator"); return; } @@ -560,7 +565,7 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const { prior_box_pattern); if (!AreScalesPresentForNodes({prior_box_input})) { - LogCannotQuantizeOp(prior_box_op); + LogCannotQuantizeOp(prior_box_op, "No scale available for the operator"); return; } @@ -598,17 +603,18 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const { return; } GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, transpose_pattern); - GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, transpose_pattern); + GET_IR_NODE_FROM_SUBGRAPH(transpose_in, transpose_in, transpose_pattern); + GET_IR_NODE_FROM_SUBGRAPH(transpose_out, transpose_out, transpose_pattern); // skip if prev op and next op is not quantized - if (!(IsOpDequantized(prev_op)) && !(IsOpQuantized(next_op))) { + if (!(IsOpDequantized(prev_op)) && !(IsOpQuantized(transpose_out))) { + LogCannotQuantizeOp(transpose_op, + "No other quantizable operators nearby"); return; } - GET_IR_NODE_FROM_SUBGRAPH(transpose_in, transpose_in, transpose_pattern); - GET_IR_NODE_FROM_SUBGRAPH(transpose_out, transpose_out, transpose_pattern); if (!AreScalesPresentForNodes({transpose_in, transpose_out})) { - LogCannotQuantizeOp(transpose_op); + LogCannotQuantizeOp(transpose_op, "No scale available for the operator"); return; } @@ -651,18 +657,17 @@ void CPUQuantizePass::QuantizeReshape(Graph* graph) const { return; } GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, reshape_pattern); - GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, reshape_pattern); + GET_IR_NODE_FROM_SUBGRAPH(reshape_in, reshape_in, reshape_pattern); + GET_IR_NODE_FROM_SUBGRAPH(reshape_out, reshape_out, reshape_pattern); - // skip if prev op and next op is not quantized - if (!(IsOpDequantized(prev_op)) && !(IsOpQuantized(next_op))) { + // skip if prev op is not quantized + if (!(IsOpDequantized(prev_op)) && !(IsOpQuantized(reshape_out))) { + LogCannotQuantizeOp(reshape_op, "No other quantizable operators nearby"); return; } - GET_IR_NODE_FROM_SUBGRAPH(reshape_in, reshape_in, reshape_pattern); - GET_IR_NODE_FROM_SUBGRAPH(reshape_out, reshape_out, reshape_pattern); - if (!AreScalesPresentForNodes({reshape_in, reshape_out})) { - LogCannotQuantizeOp(reshape_op); + LogCannotQuantizeOp(reshape_op, "No scale available for the operator"); return; } @@ -703,17 +708,17 @@ void CPUQuantizePass::QuantizeSlice(Graph* graph) const { return; } GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, slice_pattern); - GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, slice_pattern); + GET_IR_NODE_FROM_SUBGRAPH(slice_in, slice_in, slice_pattern); + GET_IR_NODE_FROM_SUBGRAPH(slice_out, slice_out, slice_pattern); // skip if prev op and next op is not quantized - if (!IsOpDequantized(prev_op) && !IsOpQuantized(next_op)) { + if (!IsOpDequantized(prev_op) && !IsOpQuantized(slice_out)) { + LogCannotQuantizeOp(slice_op, "No other quantizable operators nearby"); return; } - GET_IR_NODE_FROM_SUBGRAPH(slice_in, slice_in, slice_pattern); - GET_IR_NODE_FROM_SUBGRAPH(slice_out, slice_out, slice_pattern); if (!AreScalesPresentForNodes({slice_out})) { - LogCannotQuantizeOp(slice_op); + LogCannotQuantizeOp(slice_op, "No scale available for the operator"); return; } @@ -758,6 +763,7 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const { // skip if prev ops are not quantized if (!IsOpDequantized(prev_op_x) || !IsOpDequantized(prev_op_y)) { + LogCannotQuantizeOp(matmul_op, "No other quantizable operators nearby"); return; } GET_IR_NODE_FROM_SUBGRAPH(matmul_in_x, matmul_in_x, matmul_pattern); @@ -765,7 +771,7 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const { GET_IR_NODE_FROM_SUBGRAPH(matmul_out, matmul_out, matmul_pattern); if (!AreScalesPresentForNodes({matmul_in_x, matmul_in_y})) { - LogCannotQuantizeOp(matmul_op); + LogCannotQuantizeOp(matmul_op, "No scale available for the operator"); return; } @@ -832,7 +838,8 @@ void CPUQuantizePass::QuantizeElementwiseAdd(Graph* graph) const { if (!AreScalesPresentForNodes( {elementwise_add_x, elementwise_add_y, elementwise_add_out})) { - LogCannotQuantizeOp(elementwise_add_op); + LogCannotQuantizeOp(elementwise_add_op, + "No scale available for the operator"); return; } @@ -893,7 +900,7 @@ void CPUQuantizePass::QuantizeFusionGru(Graph* graph) const { GET_IR_NODE_FROM_SUBGRAPH(out, out, pattern); if (!AreScalesPresentForNodes({x, weight_x})) { - LogCannotQuantizeOp(op); + LogCannotQuantizeOp(op, "No scale available for the operator"); return; } @@ -950,7 +957,7 @@ void CPUQuantizePass::QuantizeMultiGru(Graph* graph) const { auto wx_names = gru->Op()->Input("WeightX"); if (!AreScalesPresentForNodes({x}) || !AreScalesPresentForVarNames(wx_names)) { - LogCannotQuantizeOp(gru); + LogCannotQuantizeOp(gru, "No scale available for the operator"); return; } @@ -1029,7 +1036,7 @@ void CPUQuantizePass::QuantizeFusionLSTM(Graph* graph) const { // Starting from here there maybe issues if (!AreScalesPresentForNodes({x, weight_x})) { - LogCannotQuantizeOp(op); + LogCannotQuantizeOp(op, "No scale available for the operator"); return; } @@ -1081,23 +1088,21 @@ void CPUQuantizePass::QuantizeNearestInterp(Graph* graph) const { return; } GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, nearest_interp_pattern); - GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, nearest_interp_pattern); + GET_IR_NODE_FROM_SUBGRAPH(nearest_interp_in, nearest_interp_in, + nearest_interp_pattern); + GET_IR_NODE_FROM_SUBGRAPH(nearest_interp_out, nearest_interp_out, + nearest_interp_pattern); // skip if prev op and next op is not quantized - if (!(IsOpDequantized(prev_op)) && !(IsOpQuantized(next_op))) { + if (!(IsOpDequantized(prev_op)) && !(IsOpQuantized(nearest_interp_out))) { LogCannotQuantizeOp(nearest_interp_op, - "There are no other quantized operators nearby, so " - "quantization is not recommended."); + "No other quantizable operators nearby"); return; } - GET_IR_NODE_FROM_SUBGRAPH(nearest_interp_in, nearest_interp_in, - nearest_interp_pattern); - GET_IR_NODE_FROM_SUBGRAPH(nearest_interp_out, nearest_interp_out, - nearest_interp_pattern); - if (!AreScalesPresentForNodes({nearest_interp_in, nearest_interp_out})) { - LogCannotQuantizeOp(nearest_interp_op); + LogCannotQuantizeOp(nearest_interp_op, + "No scale available for the operator"); return; } diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc index e7c236bc489..b38f30f39df 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc @@ -36,7 +36,7 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, op->SetType(type); op->SetAttr("use_mkldnn", use_mkldnn); op->SetAttr("name", name); - if (type != "dropout" || type != "quantize" || type != "dequantize") { + if (type != "dropout" && type != "quantize" && type != "dequantize") { op->SetAttr("mkldnn_data_type", mkldnn_data_type); } @@ -371,7 +371,7 @@ TEST(CpuQuantizePass, fusion_lstm) { } static const std::initializer_list variable_names_immutable_ops = { - "a", "w1", "b", "c", "d"}; + "a", "w1", "b", "c", "d", "e", "f", "g"}; // a->Dequantize->b // b->Tested Op->c @@ -417,36 +417,88 @@ void TestImmutableOpBetweenNonQuantizedOp(const std::string tested_op) { SCALE * S8_MAX); } +// a->Dropout1->b +// b->TestedOp1(won't be quantized)->c +// c->Dropout2->d +// c->TestedOp2(will be quantized)->e +// e->Pool2d1(will be quantized)->f +// e->Pool2d2(will be quantized)->g +void TestImmutableOpWithManyOutputs(const std::string tested_op) { + ProgramDesc prog; + for (auto& v : variable_names_immutable_ops) { + prog.MutableBlock(0)->Var(v); + } + + SetOp(&prog, "dropout", "Dropout1", {"a"}, {"b"}, true, "float32"); + SetOp(&prog, tested_op, std::string(tested_op + "1"), {"b"}, {"c"}, true, + "int8"); + SetOp(&prog, "dropout", "Dropout2", {"c"}, {"d"}, true, "float32"); + SetOp(&prog, tested_op, std::string(tested_op + "2"), {"c"}, {"e"}, true, + "int8"); + SetOp(&prog, "pool2d", "Pool2d1", {"e"}, {"f"}, true, "int8"); + SetOp(&prog, "pool2d", "Pool2d2", {"e"}, {"g"}, true, "int8"); + + // 3 Quant + 3 IN + 3 DeQuant + 3 OUT + int added_nodes = 12; + std::unordered_map expected_operators = {{tested_op, 2}, + {"dropout", 2}, + {"pool2d", 2}, + {"quantize", 3}, + {"dequantize", 3}}; + MainTest(prog, variable_names_immutable_ops, expected_operators, added_nodes, + SCALE * S8_MAX); +} + TEST(CpuQuantizePass, reshape2) { TestImmutableOp("reshape2"); } TEST(CpuQuantizePass, reshape2BetweenNonQuantizedOp) { TestImmutableOpBetweenNonQuantizedOp("reshape2"); } +TEST(CpuQuantizePass, reshape2WithManyOutputs) { + TestImmutableOpWithManyOutputs("reshape2"); +} + TEST(CpuQuantizePass, transpose2) { TestImmutableOp("transpose2"); } TEST(CpuQuantizePass, transpose2BetweenNonQuantizedOp) { TestImmutableOpBetweenNonQuantizedOp("transpose2"); } +TEST(CpuQuantizePass, transpose2WithManyOutputs) { + TestImmutableOpWithManyOutputs("transpose2"); +} + TEST(CpuQuantizePass, slice) { TestImmutableOp("slice"); } TEST(CpuQuantizePass, sliceBetweenNonQuantizedOp) { TestImmutableOpBetweenNonQuantizedOp("slice"); } +TEST(CpuQuantizePass, sliceWithManyOutputs) { + TestImmutableOpWithManyOutputs("slice"); +} + TEST(CpuQuantizePass, nearestInterp) { TestImmutableOp("nearest_interp"); } TEST(CpuQuantizePass, nearestInterpBetweenNonQuantizedOp) { TestImmutableOpBetweenNonQuantizedOp("nearest_interp"); } +TEST(CpuQuantizePass, nearestInterpWithManyOutputs) { + TestImmutableOpWithManyOutputs("nearest_interp"); +} + TEST(CpuQuantizePass, nearestInterpV2) { TestImmutableOp("nearest_interp_v2"); } TEST(CpuQuantizePass, nearestInterpV2BetweenNonQuantizedOp) { TestImmutableOpBetweenNonQuantizedOp("nearest_interp_v2"); } +TEST(CpuQuantizePass, nearestInterpV2WithManyOutputs) { + TestImmutableOpWithManyOutputs("nearest_interp_v2"); +} + static const std::initializer_list variable_names_matmul = { "a", "b", "c", "d", "e", "f"}; -- GitLab