diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc index 838912f659ff7c57683fba3920ede9f2d9829edd..db1a10e3e31b25c51996f5b9ebbfdbed62c8aed5 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc @@ -12,8 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h" +#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h" // NOLINT #include +#include #include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/imperative/type_defs.h" @@ -23,6 +24,10 @@ namespace paddle { namespace framework { namespace ir { +static float const SCALE = 2.f; +static int const S8_MAX = 127; +static int const U8_MAX = 255; + void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, const std::vector& inputs, const std::vector& outputs, bool use_mkldnn, @@ -31,6 +36,9 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, op->SetType(type); op->SetAttr("use_mkldnn", use_mkldnn); op->SetAttr("name", name); + if (type != "dropout" || type != "quantize" || type != "dequantize") { + op->SetAttr("mkldnn_data_type", mkldnn_data_type); + } if (type == "conv2d") { op->SetInput("Input", {inputs[0]}); @@ -47,18 +55,15 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, op->SetAttr("fuse_residual_connection", false); } op->SetOutput("Output", {outputs[0]}); - op->SetAttr("mkldnn_data_type", mkldnn_data_type); op->SetAttr("Scale_in", 1.0f); op->SetAttr("Scale_out", 1.0f); op->SetAttr("Scale_weights", std::vector{1.0f}); } else if (type == "pool2d" || type == "transpose2" || type == "reshape2") { op->SetInput("X", {inputs[0]}); op->SetOutput("Out", {outputs[0]}); - op->SetAttr("mkldnn_data_type", mkldnn_data_type); } else if (type == "slice") { op->SetInput("Input", {inputs[0]}); op->SetOutput("Out", {outputs[0]}); - op->SetAttr("mkldnn_data_type", mkldnn_data_type); } else if (type == "dropout") { op->SetInput("X", {inputs[0]}); op->SetOutput("Out", {outputs[0]}); @@ -67,14 +72,12 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, if (inputs.size() > 1) op->SetInput("W", {inputs[1]}); if (inputs.size() > 2) op->SetInput("Bias", {inputs[2]}); op->SetOutput("Out", {outputs[0]}); - op->SetAttr("mkldnn_data_type", mkldnn_data_type); op->SetAttr("Scale_in", 1.0f); op->SetAttr("Scale_out", 1.0f); op->SetAttr("Scale_weights", std::vector{1.0f}); } else if (type == "concat") { op->SetInput("X", inputs); op->SetOutput("Out", outputs); - op->SetAttr("mkldnn_data_type", mkldnn_data_type); } else if (type == "dequantize") { op->SetInput("Input", {inputs[0]}); op->SetOutput("Output", {outputs[0]}); @@ -83,7 +86,6 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, op->SetInput("X", {inputs[0]}); if (inputs.size() > 1) op->SetInput("Y", {inputs[1]}); op->SetOutput("Out", {outputs[0]}); - op->SetAttr("mkldnn_data_type", mkldnn_data_type); op->SetAttr("Scale_x", 1.0f); op->SetAttr("Scale_y", 1.0f); op->SetAttr("Scale_out", 1.0f); @@ -91,7 +93,6 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, op->SetInput("X", {inputs[0]}); if (inputs.size() > 1) op->SetInput("Y", {inputs[1]}); op->SetOutput("Out", {outputs[0]}); - op->SetAttr("mkldnn_data_type", mkldnn_data_type); op->SetAttr("Scale_x", 1.0f); op->SetAttr("Scale_y", 1.0f); op->SetAttr("Scale_out", 1.0f); @@ -101,7 +102,6 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, op->SetInput("WeightX", {inputs[2]}); op->SetInput("WeightH", {inputs[3]}); op->SetOutput("Hidden", {outputs[0]}); - op->SetAttr("mkldnn_data_type", mkldnn_data_type); op->SetAttr("Scale_data", 1.0f); op->SetAttr("Shift_data", 0.0f); op->SetAttr("Weight_scale", std::vector{1.0f}); @@ -114,7 +114,6 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, op->SetOutput("Hidden", {outputs[0]}); op->SetOutput("Cell", {outputs[1]}); - op->SetAttr("mkldnn_data_type", mkldnn_data_type); op->SetAttr("Scale_data", 1.0f); op->SetAttr("Shift_data", 0.0f); op->SetAttr("Weight_scale", std::vector{1.0f}); @@ -144,7 +143,7 @@ void PreparePass(std::unique_ptr* graph, const ProgramDesc& prog, LoDTensor tensor; tensor.Resize({1}); auto* ptr = tensor.mutable_data(place); - ptr[0] = 2.0; + ptr[0] = SCALE; (*scales)[v] = std::make_pair(v == var_signed, std::move(tensor)); } @@ -158,7 +157,57 @@ void PreparePass(std::unique_ptr* graph, const ProgramDesc& prog, *current_nodes_num = (*graph)->Nodes().size(); } -namespace { +void CheckScales(const OpDesc* op, float scale, float shift) { + std::string type = op->Type(); + std::vector scale_names; + if (type == "conv2d" || type == "fc") { + EXPECT_EQ(op->GetAttrIfExists>("Scale_weights")[0], + scale); + scale_names.push_back("Scale_in"); + scale_names.push_back("Scale_out"); + } else if (type == "matmul" || type == "elementwise_add") { + scale_names.push_back("Scale_x"); + scale_names.push_back("Scale_y"); + scale_names.push_back("Scale_out"); + } else if (type == "fusion_gru" || type == "fusion_lstm") { + EXPECT_EQ(op->GetAttrIfExists("Shift_data"), shift); + EXPECT_EQ(op->GetAttrIfExists>("Scale_weights")[0], + scale); + EXPECT_EQ(op->GetAttrIfExists("force_fp32_output"), true); + scale_names.push_back("Scale_data"); + } + + for (auto const& scale_name : scale_names) { + EXPECT_EQ(op->GetAttrIfExists(scale_name), scale); + } +} + +void MainTest(const ProgramDesc& prog, + const std::vector variable_names, + std::unordered_map expected_operators, + const int added_nodes_count, float scale = 1.f, float shift = 1.f, + std::string var_without_scale = "", std::string var_signed = "") { + std::unique_ptr graph(new ir::Graph(prog)); + int original_nodes_num, current_nodes_num; + PreparePass(&graph, prog, variable_names, &original_nodes_num, + ¤t_nodes_num, var_without_scale, var_signed); + std::unordered_map actual_operators; + for (auto* node : graph->Nodes()) { + if (node->IsOp()) { + auto* op = node->Op(); + if (expected_operators.count(op->Type()) > 0) { + expected_operators[op->Type()]--; + if (op->GetAttrIfExists("mkldnn_data_type") == "int8") + CheckScales(op, scale, shift); + } + } + } + for (auto const& pair : expected_operators) { + EXPECT_EQ(pair.second, 0); + } + EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num); +} + static const std::initializer_list variable_names{ "a", "w1", "c", "d", "w2", "e", "f", "g", "h", "w3", "b1", "i", "j", "w4", "b2", "w5", "b3"}; @@ -199,48 +248,6 @@ ProgramDesc BuildProgramDesc(bool use_mkldnn, return prog; } -void MainTest(const ProgramDesc& prog, int conv_count, int pool_count, - int quant_count, int dequant_count, int added_nodes_count, - float scale) { - std::unique_ptr graph(new ir::Graph(prog)); - int original_nodes_num, current_nodes_num; - PreparePass(&graph, prog, variable_names, &original_nodes_num, - ¤t_nodes_num); - - int quantize_nodes_count = 0; - int dequantize_nodes_count = 0; - int conv2d_nodes_count = 0; - int pool2d_nodes_count = 0; - for (auto* node : graph->Nodes()) { - if (node->IsOp()) { - auto* op = node->Op(); - if (op->Type() == "conv2d") { - conv2d_nodes_count++; - auto op_name = BOOST_GET_CONST(std::string, op->GetAttr("name")); - EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_in")), scale) - << "Scale_in for node '" + op_name + "'."; - EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_out")), scale) - << "Scale_out for node '" + op_name + "'."; - EXPECT_EQ(BOOST_GET_CONST(std::vector, - op->GetAttr("Scale_weights"))[0], - scale) - << "Scale_weights for node '" + op_name + "'."; - } else if (op->Type() == "pool2d") { - pool2d_nodes_count++; - } else if (op->Type() == "quantize") { - quantize_nodes_count++; - } else if (op->Type() == "dequantize") { - dequantize_nodes_count++; - } - } - } - EXPECT_EQ(conv2d_nodes_count, conv_count); - EXPECT_EQ(pool2d_nodes_count, pool_count); - EXPECT_EQ(quantize_nodes_count, quant_count); - EXPECT_EQ(dequantize_nodes_count, dequant_count); - EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num); -} - TEST(CpuQuantizePass, quantize) { bool use_mkldnn = true; std::string mkldnn_data_type = "int8"; @@ -256,16 +263,20 @@ TEST(CpuQuantizePass, quantize) { // (d->QUANT7->IN7,w4, b2)->Conv4->DEQUANT6->OUT6->i // Insert nodes: 8 Quant + 8 IN + 7 OUT + 7 DEQUANT int added_nodes = 8 + 8 + 7 + 7; - MainTest(BuildProgramDesc(use_mkldnn, mkldnn_data_type), 4, 2, 8, 7, - added_nodes, 2.0f * 127); + std::unordered_map expected_operators = { + {"conv2d", 4}, {"pool2d", 2}, {"quantize", 8}, {"dequantize", 7}}; + MainTest(BuildProgramDesc(use_mkldnn, mkldnn_data_type), variable_names, + expected_operators, added_nodes, SCALE * S8_MAX); } TEST(CpuQuantizePass, do_not_quantize) { bool use_mkldnn = true; std::string mkldnn_data_type = "float32"; int added_nodes = 0; - MainTest(BuildProgramDesc(use_mkldnn, mkldnn_data_type), 4, 2, 0, 0, - added_nodes, 1.0f); + std::unordered_map expected_operators = { + {"conv2d", 4}, {"pool2d", 2}, {"quantize", 0}, {"dequantize", 0}}; + MainTest(BuildProgramDesc(use_mkldnn, mkldnn_data_type), variable_names, + expected_operators, added_nodes, 1.0f); } static const std::initializer_list variable_names_concat = { @@ -286,134 +297,16 @@ ProgramDesc BuildProgramDescConcat() { return prog; } -void MainTestConcat(const ProgramDesc& prog, int pool_count, int concat_count, - int quant_count, int dequant_count, int added_nodes_count) { - std::unique_ptr graph(new ir::Graph(prog)); - int original_nodes_num, current_nodes_num; - PreparePass(&graph, prog, variable_names_concat, &original_nodes_num, - ¤t_nodes_num); - - int quantize_nodes_count = 0; - int dequantize_nodes_count = 0; - int concat_nodes_count = 0; - int pool2d_nodes_count = 0; - for (auto* node : graph->Nodes()) { - if (node->IsOp()) { - auto* op = node->Op(); - if (op->Type() == "concat") { - concat_nodes_count++; - } else if (op->Type() == "pool2d") { - pool2d_nodes_count++; - } else if (op->Type() == "quantize") { - quantize_nodes_count++; - } else if (op->Type() == "dequantize") { - dequantize_nodes_count++; - } - } - } - EXPECT_EQ(concat_nodes_count, concat_count); - EXPECT_EQ(pool2d_nodes_count, pool_count); - EXPECT_EQ(quantize_nodes_count, quant_count); - EXPECT_EQ(dequantize_nodes_count, dequant_count); - EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num); -} - TEST(CpuQuantizePass, concat) { // a1->Pool1->b1 // a2->Pool2->b2 // (b1->QUANT1->IN1, b2->QUANT2->IN2)->Concat->c // c->OUT1->DEQUANT1->Pool3->d - int pool_count = 3; - int concat_count = 1; - int quant_count = 2; - int dequant_count = 1; - int added_nodes_count = 6; - MainTestConcat(BuildProgramDescConcat(), pool_count, concat_count, - quant_count, dequant_count, added_nodes_count); -} - -static const std::initializer_list variable_names_transpose = { - "a", "w1", "b", "c", "w2", "d", "e", "f"}; - -// a->Conv1->b -// b->Transpose1->c -// c->Conv2->d -// d->Transpose2->e -// e->Dropout->f -ProgramDesc BuildProgramDescTranspose() { - ProgramDesc prog; - for (auto& v : variable_names_transpose) { - auto* var = prog.MutableBlock(0)->Var(v); - if (v.find("w") == 0) { - var->SetPersistable(true); - } - } - - SetOp(&prog, "conv2d", "Conv1", {"a", "w1"}, {"b"}, true, "int8"); - SetOp(&prog, "transpose2", "Transpose1", {"b"}, {"c"}, true, "int8"); - SetOp(&prog, "conv2d", "Conv1", {"c", "w2"}, {"d"}, true, "int8"); - SetOp(&prog, "transpose2", "Transpose2", {"d"}, {"e"}, true, "int8"); - SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, "float32"); - - return prog; -} - -void MainTestTranspose(const ProgramDesc& prog, int conv_count, - int transpose_count, int quant_count, int dequant_count, - int added_nodes_count, float scale) { - std::unique_ptr graph(new ir::Graph(prog)); - int original_nodes_num, current_nodes_num; - PreparePass(&graph, prog, variable_names_transpose, &original_nodes_num, - ¤t_nodes_num); - - int quantize_nodes_count = 0; - int dequantize_nodes_count = 0; - int transpose_nodes_count = 0; - int conv_nodes_count = 0; - for (auto* node : graph->Nodes()) { - if (node->IsOp()) { - auto* op = node->Op(); - if (op->Type() == "transpose2") { - transpose_nodes_count++; - } else if (op->Type() == "conv2d") { - conv_nodes_count++; - auto op_name = BOOST_GET_CONST(std::string, op->GetAttr("name")); - EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_in")), scale) - << "Scale_in for node '" + op_name + "'."; - EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_out")), scale) - << "Scale_out for node '" + op_name + "'."; - EXPECT_EQ(BOOST_GET_CONST(std::vector, - op->GetAttr("Scale_weights"))[0], - scale) - << "Scale_weights for node '" + op_name + "'."; - } else if (op->Type() == "quantize") { - quantize_nodes_count++; - } else if (op->Type() == "dequantize") { - dequantize_nodes_count++; - } - } - } - EXPECT_EQ(transpose_nodes_count, transpose_count); - EXPECT_EQ(conv_nodes_count, conv_count); - EXPECT_EQ(quantize_nodes_count, quant_count); - EXPECT_EQ(dequantize_nodes_count, dequant_count); - EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num); -} - -TEST(CpuQuantizePass, transpose) { - // a1->Quant->a2->Conv1->b1->Dequant->b2 - // b2->Quant->b3->Transpose->c1->Dequant->c2 - // c2->Quant->c3->Conv2->d1->Dequant->d2 - // d2->Quant->d3->Transpose->e1->Dequant->e2 - // e2->Dropout->f - int conv_count = 2; - int transpose_count = 2; - int quant_count = 4; - int dequant_count = 4; - // 4 Quant + 4 IN + 4 DeQuant + 4 OUT - int added_nodes_count = 4 + 4 + 4 + 4; - MainTestTranspose(BuildProgramDescTranspose(), conv_count, transpose_count, - quant_count, dequant_count, added_nodes_count, 2.0f * 127); + int added_nodes = 6; + std::unordered_map expected_operators = { + {"pool2d", 3}, {"concat", 1}, {"quantize", 2}, {"dequantize", 1}}; + MainTest(BuildProgramDescConcat(), variable_names_concat, expected_operators, + added_nodes); } static const std::initializer_list variable_names_fusion_gru = { @@ -422,7 +315,7 @@ static const std::initializer_list variable_names_fusion_gru = { // (x, wx, wh, b)->Fusion_gru->h ProgramDesc BuildProgramDescFusionGru() { ProgramDesc prog; - for (auto& v : variable_names_transpose) { + for (auto& v : variable_names_fusion_gru) { auto* var = prog.MutableBlock(0)->Var(v); if (v.find("wx") == 0 || v.find("wh") || v.find("b")) { var->SetPersistable(true); @@ -441,7 +334,7 @@ static const std::initializer_list variable_names_fusion_lstm = { // (x, wx, wh, b)->Fusion_lstm_1->h ProgramDesc BuildProgramDescFusionLSTM() { ProgramDesc prog; - for (auto& v : variable_names_transpose) { + for (auto& v : variable_names_fusion_lstm) { auto* var = prog.MutableBlock(0)->Var(v); if (v.find("wx") == 0 || v.find("wh") || v.find("b")) { var->SetPersistable(true); @@ -454,109 +347,180 @@ ProgramDesc BuildProgramDescFusionLSTM() { return prog; } -void MainTestFusionGru(const ProgramDesc& prog, int gru_count, int quant_count, - int dequant_count, int added_nodes_count, float scale, - float shift) { - std::unique_ptr graph(new ir::Graph(prog)); - int original_nodes_num, current_nodes_num; - PreparePass(&graph, prog, variable_names_fusion_gru, &original_nodes_num, - ¤t_nodes_num); +TEST(CpuQuantizePass, fusion_gru) { + // (x, wx, wh, b)->Fusion_gru->h - int quantize_nodes_count = 0; - int dequantize_nodes_count = 0; - int gru_nodes_count = 0; - for (auto* node : graph->Nodes()) { - if (node->IsOp()) { - auto* op = node->Op(); - if (op->Type() == "fusion_gru") { - gru_nodes_count++; + // 1 Quant + 1 IN + 0 DeQuant + 0 OUT + int added_nodes = 1 + 1 + 0 + 0; + std::unordered_map expected_operators = { + {"fusion_gru", 1}, {"quantize", 1}, {"dequantize", 0}}; + MainTest(BuildProgramDescFusionGru(), variable_names_fusion_gru, + expected_operators, added_nodes, SCALE * S8_MAX, 128); +} - auto op_name = BOOST_GET_CONST(std::string, op->GetAttr("name")); - EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_data")), scale) - << "Scale_data for node '" + op_name + "'."; - EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Shift_data")), shift) - << "Shift_data for node '" + op_name + "'."; - EXPECT_EQ(BOOST_GET_CONST(std::vector, - op->GetAttr("Scale_weights"))[0], - scale) - << "Scale_weights for node '" + op_name + "'."; - EXPECT_EQ(BOOST_GET_CONST(bool, op->GetAttr("force_fp32_output")), true) - << "force_fp32_output for node '" + op_name + "'."; - } else if (op->Type() == "quantize") { - quantize_nodes_count++; - } else if (op->Type() == "dequantize") { - dequantize_nodes_count++; - } - } +TEST(CpuQuantizePass, fusion_lstm) { + // (x, wx, wh, b)->Fusion_lstm->h + + // 1 Quant + 1 IN + 0 DeQuant + 0 OUT + int added_nodes = 1 + 1 + 0 + 0; + std::unordered_map expected_operators = { + {"fusion_lstm", 1}, {"quantize", 1}, {"dequantize", 0}}; + MainTest(BuildProgramDescFusionLSTM(), variable_names_fusion_lstm, + expected_operators, added_nodes, SCALE * S8_MAX, 128.); +} + +static const std::initializer_list variable_names_immutable_ops = { + "a", "w1", "b", "c", "d"}; + +// a->Dequantize->b +// b->Tested Op->c +// c->Dropout->d +void TestImmutableOp(const std::string tested_op) { + ProgramDesc prog; + for (auto& v : variable_names_immutable_ops) { + prog.MutableBlock(0)->Var(v); } - EXPECT_EQ(gru_nodes_count, gru_count); - EXPECT_EQ(quantize_nodes_count, quant_count); - EXPECT_EQ(dequantize_nodes_count, dequant_count); - EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num); + SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true); + SetOp(&prog, tested_op, tested_op, {"b"}, {"c"}, true, "int8"); + SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, "float32"); + + // a->Dequantize->b + // b2->Quant->b3->Tested Op->c1->Dequant->c2 + // c2->Dropout->d + // 1 Quant + 1 IN + 1 DeQuant + 1 OUT + int added_nodes = 4; + std::unordered_map expected_operators = { + {tested_op, 1}, {"quantize", 1}, {"dequantize", 2}}; + MainTest(prog, variable_names_immutable_ops, expected_operators, added_nodes, + SCALE * S8_MAX); } -TEST(CpuQuantizePass, fusion_gru) { - // (x, wx, wh, b)->Fusion_gru->h - int gru_count = 1; - int quant_count = 1; - int dequant_count = 0; - // 1 Quant + 1 IN + 0 DeQuant + 0 OUT - int added_nodes_count = 1 + 1 + 0 + 0; - MainTestFusionGru(BuildProgramDescFusionGru(), gru_count, quant_count, - dequant_count, added_nodes_count, 2. * 127, 128.); +// a->Dropout1->b +// b->Tested Op->c +// c->Dropout2->d +void TestImmutableOpBetweenNonQuantizedOp(const std::string tested_op) { + ProgramDesc prog; + for (auto& v : variable_names_immutable_ops) { + prog.MutableBlock(0)->Var(v); + } + + SetOp(&prog, "dropout", "Dropout1", {"a"}, {"b"}, true, "float32"); + SetOp(&prog, tested_op, tested_op, {"b"}, {"c"}, true, "int8"); + SetOp(&prog, "dropout", "Dropout2", {"c"}, {"d"}, true, "float32"); + + // 0 Quant + 0 IN + 0 DeQuant + 0 OUT + int added_nodes = 0; + std::unordered_map expected_operators = { + {tested_op, 1}, {"dropout", 2}, {"quantize", 0}, {"dequantize", 0}}; + MainTest(prog, variable_names_immutable_ops, expected_operators, added_nodes, + SCALE * S8_MAX); } -void MainTestFusionLSTM(const ProgramDesc& prog, int expect_lstm_count, - int quant_count, int dequant_count, - int added_nodes_count, float scale, float shift) { - std::unique_ptr graph(new ir::Graph(prog)); - int original_nodes_num, current_nodes_num; - PreparePass(&graph, prog, variable_names_fusion_lstm, &original_nodes_num, - ¤t_nodes_num); +TEST(CpuQuantizePass, reshape2) { TestImmutableOp("reshape2"); } - int quantize_nodes_count = 0; - int dequantize_nodes_count = 0; - int lstm_nodes_count = 0; - for (auto* node : graph->Nodes()) { - if (node->IsOp()) { - auto* op = node->Op(); - if (op->Type() == "fusion_lstm") { - lstm_nodes_count++; +TEST(CpuQuantizePass, reshape2BetweenNonQuantizedOp) { + TestImmutableOpBetweenNonQuantizedOp("reshape2"); +} - auto op_name = BOOST_GET_CONST(std::string, op->GetAttr("name")); - EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_data")), scale) - << "Scale_data for node '" + op_name + "'."; - EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Shift_data")), shift) - << "Shift_data for node '" + op_name + "'."; - EXPECT_EQ(BOOST_GET_CONST(std::vector, - op->GetAttr("Scale_weights"))[0], - scale) - << "Scale_weights for node '" + op_name + "'."; - EXPECT_EQ(BOOST_GET_CONST(bool, op->GetAttr("force_fp32_output")), true) - << "force_fp32_output for node '" + op_name + "'."; - } else if (op->Type() == "quantize") { - quantize_nodes_count++; - } else if (op->Type() == "dequantize") { - dequantize_nodes_count++; - } - } +TEST(CpuQuantizePass, transpose2) { TestImmutableOp("transpose2"); } + +TEST(CpuQuantizePass, transpose2BetweenNonQuantizedOp) { + TestImmutableOpBetweenNonQuantizedOp("transpose2"); +} + +TEST(CpuQuantizePass, slice) { TestImmutableOp("slice"); } + +TEST(CpuQuantizePass, sliceBetweenNonQuantizedOp) { + TestImmutableOpBetweenNonQuantizedOp("slice"); +} + +static const std::initializer_list variable_names_matmul = { + "a", "b", "c", "d", "e", "f"}; + +ProgramDesc BuildProgramDescMatmul() { + ProgramDesc prog; + for (auto& v : variable_names_matmul) { + prog.MutableBlock(0)->Var(v); } - EXPECT_EQ(lstm_nodes_count, expect_lstm_count); - EXPECT_EQ(quantize_nodes_count, quant_count); - EXPECT_EQ(dequantize_nodes_count, dequant_count); - EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num); + SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true); + SetOp(&prog, "dequantize", "Dequantize2", {"c"}, {"d"}, true); + SetOp(&prog, "matmul", "Matmul", {"b", "d"}, {"e"}, true, "int8"); + SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, "float32"); + + return prog; } -TEST(CpuQuantizePass, fusion_lstm) { - // (x, wx, wh, b)->Fusion_lstm->h - int expect_lstm_count = 1; - int expect_quant_count = 1; - int dequant_count = 0; - // 1 Quant + 1 IN + 0 DeQuant + 0 OUT - int added_nodes_count = 1 + 1 + 0 + 0; - MainTestFusionLSTM(BuildProgramDescFusionLSTM(), expect_lstm_count, - expect_quant_count, dequant_count, added_nodes_count, - 2. * 127, 128.); +ProgramDesc BuildProgramDescMatmulNotQuantized() { + ProgramDesc prog; + for (auto& v : variable_names_matmul) { + prog.MutableBlock(0)->Var(v); + } + SetOp(&prog, "dropout", "Dropout", {"a"}, {"b"}, false); + SetOp(&prog, "dequantize", "Dequantize", {"c"}, {"d"}, true); + SetOp(&prog, "matmul", "Matmul", {"b", "d"}, {"e"}, true, "int8"); + SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, "float32"); + + return prog; +} + +TEST(CpuQuantizePass, matmul) { + // 2 Quant + 2 IN + 1 DeQuant + 1 OUT + int added_nodes = 6; + std::unordered_map expected_operators = { + {"matmul", 1}, {"quantize", 2}, {"dequantize", 3}}; + MainTest(BuildProgramDescMatmul(), variable_names_matmul, expected_operators, + added_nodes, SCALE * S8_MAX); +} + +TEST(CpuQuantizePass, matmul_not_quantized) { + // nothing change + int added_nodes = 0; + std::unordered_map expected_operators = { + {"matmul", 1}, {"quantize", 0}, {"dequantize", 1}}; + MainTest(BuildProgramDescMatmulNotQuantized(), variable_names_matmul, + expected_operators, added_nodes, 1.0f); +} + +static const std::initializer_list variable_names_elementwise_add = + {"a", "b", "c", "d", "e", "f"}; + +ProgramDesc BuildProgramDescElementwiseAdd() { + ProgramDesc prog; + for (auto& v : variable_names_elementwise_add) { + prog.MutableBlock(0)->Var(v); + } + SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true); + SetOp(&prog, "dequantize", "Dequantize2", {"c"}, {"d"}, true); + SetOp(&prog, "elementwise_add", "ElementwiseAdd", {"b", "d"}, {"e"}, true, + "int8"); + SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, "float32"); + + return prog; +} + +TEST(CpuQuantizePass, elementwise_add) { + // 2 Quant + 2 IN + 1 DeQuant + 1 OUT + int added_nodes = 6; + std::unordered_map expected_operators = { + {"elementwise_add", 1}, {"quantize", 2}, {"dequantize", 3}}; + MainTest(BuildProgramDescElementwiseAdd(), variable_names_elementwise_add, + expected_operators, added_nodes, SCALE * S8_MAX); +} + +TEST(CpuQuantizePass, elementwise_add_output_scale_missing) { + int added_nodes = 0; + std::unordered_map expected_operators = { + {"elementwise_add", 1}, {"quantize", 0}, {"dequantize", 2}}; + MainTest(BuildProgramDescElementwiseAdd(), variable_names_elementwise_add, + expected_operators, added_nodes, 1.f, 1.f, "e"); +} + +TEST(CpuQuantizePass, elementwise_add_unsigned_and_signed_input) { + int added_nodes = 0; + std::unordered_map expected_operators = { + {"elementwise_add", 1}, {"quantize", 0}, {"dequantize", 2}}; + MainTest(BuildProgramDescElementwiseAdd(), variable_names_elementwise_add, + expected_operators, added_nodes, 1.f, 1.f, "", "b"); } const std::vector churn_out_vars(ProgramDesc* prog, @@ -681,395 +645,6 @@ TEST(CpuQuantizePass, multi_gru_3) { MainTestMultiGru(layers); } -static const std::initializer_list variable_names_reshape = { - "a", "w1", "b", "c", "d", "e", "f"}; - -// a->Dequantize->b -// b->Reshape->c -// c->Dropout->d -ProgramDesc BuildProgramDescReshape() { - ProgramDesc prog; - for (auto& v : variable_names_reshape) { - prog.MutableBlock(0)->Var(v); - } - SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true); - SetOp(&prog, "reshape2", "Reshape2", {"b"}, {"c"}, true, "int8"); - SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, "float32"); - - return prog; -} - -// a->Transpose->b -// b->Reshape->c -// c->Dropout->d -ProgramDesc BuildProgramDescReshapeBetweenNonQuantizedOp() { - ProgramDesc prog; - for (auto& v : variable_names_reshape) { - prog.MutableBlock(0)->Var(v); - } - - SetOp(&prog, "transpose2", "Transpose2", {"a"}, {"b"}, true, "float32"); - SetOp(&prog, "reshape2", "Reshape2", {"b"}, {"c"}, true, "int8"); - SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, "float32"); - - return prog; -} - -void MainTestReshape(const ProgramDesc& prog, int transpose_count, - int reshape_count, int quant_count, int dequant_count, - int added_nodes_count, float scale) { - std::unique_ptr graph(new ir::Graph(prog)); - int original_nodes_num, current_nodes_num; - PreparePass(&graph, prog, variable_names_reshape, &original_nodes_num, - ¤t_nodes_num); - - float quant_scale = 1.0f; - float dequant_scale = 1.0f; - int quantize_nodes_count = 0; - int dequantize_nodes_count = 0; - int transpose_nodes_count = 0; - int reshape_nodes_count = 0; - for (auto* node : graph->Nodes()) { - if (node->IsOp()) { - auto* op = node->Op(); - if (op->Type() == "transpose2") { - transpose_nodes_count++; - } else if (op->Type() == "reshape2") { - reshape_nodes_count++; - } else if (op->Type() == "quantize") { - quantize_nodes_count++; - quant_scale = BOOST_GET_CONST(float, op->GetAttr("Scale")); - EXPECT_EQ(quant_scale, scale) << "Scale for node '" + op->Type() + "'."; - } else if (op->Type() == "dequantize") { - dequantize_nodes_count++; - auto op_name = op->GetAttrIfExists("name"); - VLOG(3) << op_name << "\n"; - if (op_name != "Dequantize1") { - dequant_scale = BOOST_GET_CONST(float, op->GetAttr("Scale")); - EXPECT_EQ(dequant_scale, scale) - << "Scale for node '" + op->Type() + "'."; - } - } - } - } - EXPECT_EQ(transpose_nodes_count, transpose_count); - EXPECT_EQ(reshape_nodes_count, reshape_count); - EXPECT_EQ(quantize_nodes_count, quant_count); - EXPECT_EQ(dequantize_nodes_count, dequant_count); - EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num); -} - -TEST(CpuQuantizePass, reshape) { - // a->Dequantize->b - // b2->Quant->b3->Reshape2->c1->Dequant->c2 - // c2->Dropout->d - int reshape_count = 1; - int transpose_count = 0; - int quant_count = 1; - int dequant_count = 2; - // 1 Quant + 1 IN + 1 DeQuant + 1 OUT - int added_nodes_count = 4; - MainTestReshape(BuildProgramDescReshape(), transpose_count, reshape_count, - quant_count, dequant_count, added_nodes_count, 2.0f * 127); -} - -TEST(CpuQuantizePass, reshapeBetweenNonQuantizedOp) { - // a->Transpos2->b - // b->Reshape2->c - // c->Dropout->d - int reshape_count = 1; - int transpose_count = 1; - int quant_count = 0; - int dequant_count = 0; - // 0 Quant + 0 IN + 0 DeQuant + 0 OUT - int added_nodes_count = 0; - MainTestReshape(BuildProgramDescReshapeBetweenNonQuantizedOp(), - transpose_count, reshape_count, quant_count, dequant_count, - added_nodes_count, 2.0f * 127); -} - -static const std::initializer_list variable_names_slice = { - "a", "b", "c", "d"}; - -// a->Dequantize->b -// b->Slice->c -// c->Dropout->d -ProgramDesc BuildProgramDescSlice() { - ProgramDesc prog; - for (auto& v : variable_names_slice) { - prog.MutableBlock(0)->Var(v); - } - SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true); - SetOp(&prog, "slice", "Slice", {"b"}, {"c"}, true, "int8"); - SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, "float32"); - - return prog; -} - -// a->Transpose->b -// b->slice->c -// c->Dropout->d -ProgramDesc BuildProgramDescSliceBetweenNonQuantizedOp() { - ProgramDesc prog; - for (auto& v : variable_names_slice) { - prog.MutableBlock(0)->Var(v); - } - - SetOp(&prog, "transpose2", "Transpose2", {"a"}, {"b"}, true, "float32"); - SetOp(&prog, "slice", "Slice", {"b"}, {"c"}, true, "int8"); - SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, "float32"); - - return prog; -} - -void MainTestSlice(const ProgramDesc& prog, int transpose_count, - int slice_count, int quant_count, int dequant_count, - int added_nodes_count, float scale) { - std::unique_ptr graph(new ir::Graph(prog)); - int original_nodes_num, current_nodes_num; - PreparePass(&graph, prog, variable_names_slice, &original_nodes_num, - ¤t_nodes_num); - - float quant_scale = 1.0f; - float dequant_scale = 1.0f; - int quantize_nodes_count = 0; - int dequantize_nodes_count = 0; - int transpose_nodes_count = 0; - int slice_nodes_count = 0; - for (auto* node : graph->Nodes()) { - if (node->IsOp()) { - auto* op = node->Op(); - if (op->Type() == "transpose2") { - transpose_nodes_count++; - } else if (op->Type() == "slice") { - slice_nodes_count++; - } else if (op->Type() == "quantize") { - quantize_nodes_count++; - quant_scale = BOOST_GET_CONST(float, op->GetAttr("Scale")); - EXPECT_EQ(quant_scale, scale) << "Scale for node '" + op->Type() + "'."; - } else if (op->Type() == "dequantize") { - dequantize_nodes_count++; - auto op_name = op->GetAttrIfExists("name"); - VLOG(3) << op_name << "\n"; - if (op_name != "Dequantize1") { - dequant_scale = BOOST_GET_CONST(float, op->GetAttr("Scale")); - EXPECT_EQ(dequant_scale, scale) - << "Scale for node '" + op->Type() + "'."; - } - } - } - } - EXPECT_EQ(transpose_nodes_count, transpose_count); - EXPECT_EQ(slice_nodes_count, slice_count); - EXPECT_EQ(quantize_nodes_count, quant_count); - EXPECT_EQ(dequantize_nodes_count, dequant_count); - EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num); -} - -TEST(CpuQuantizePass, slice) { - // a->Dequantize->b - // b2->Quant->b3->slice->c1->Dequant->c2 - // c2->Dropout->d - int slice_count = 1; - int transpose_count = 0; - int quant_count = 1; - int dequant_count = 2; - // 1 Quant + 1 IN + 1 DeQuant + 1 OUT - int added_nodes_count = 4; - MainTestSlice(BuildProgramDescSlice(), transpose_count, slice_count, - quant_count, dequant_count, added_nodes_count, 2.0f * 127); -} - -TEST(CpuQuantizePass, sliceBetweenNonQuantizedOp) { - // a->Transpos2->b - // b->slice->c - // c->Dropout->d - int slice_count = 1; - int transpose_count = 1; - int quant_count = 0; - int dequant_count = 0; - // 0 Quant + 0 IN + 0 DeQuant + 0 OUT - int added_nodes_count = 0; - MainTestSlice(BuildProgramDescSliceBetweenNonQuantizedOp(), transpose_count, - slice_count, quant_count, dequant_count, added_nodes_count, - 2.0f * 127); -} - -static const std::initializer_list variable_names_matmul = { - "a", "b", "c", "d", "e", "f"}; - -ProgramDesc BuildProgramDescMatmul() { - ProgramDesc prog; - for (auto& v : variable_names_matmul) { - prog.MutableBlock(0)->Var(v); - } - SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true); - SetOp(&prog, "dequantize", "Dequantize2", {"c"}, {"d"}, true); - SetOp(&prog, "matmul", "Matmul", {"b", "d"}, {"e"}, true, "int8"); - SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, "float32"); - - return prog; -} - -ProgramDesc BuildProgramDescMatmulNotQuantized() { - ProgramDesc prog; - for (auto& v : variable_names_matmul) { - prog.MutableBlock(0)->Var(v); - } - SetOp(&prog, "dropout", "Dropout", {"a"}, {"b"}, false); - SetOp(&prog, "dequantize", "Dequantize", {"c"}, {"d"}, true); - SetOp(&prog, "matmul", "Matmul", {"b", "d"}, {"e"}, true, "int8"); - SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, "float32"); - - return prog; -} - -void MainTestMatmul(const ProgramDesc& prog, int matmul_count, int quant_count, - int dequant_count, int added_nodes_count, float scale) { - std::unique_ptr graph(new ir::Graph(prog)); - int original_nodes_num, current_nodes_num; - PreparePass(&graph, prog, variable_names_matmul, &original_nodes_num, - ¤t_nodes_num); - - int quantize_nodes_count = 0; - int dequantize_nodes_count = 0; - int matmul_nodes_count = 0; - for (auto* node : graph->Nodes()) { - if (node->IsOp()) { - auto* op = node->Op(); - if (op->Type() == "matmul") { - matmul_nodes_count++; - auto op_name = BOOST_GET_CONST(std::string, op->GetAttr("name")); - EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_x")), scale) - << "Scale_x for node '" + op_name + "'."; - EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_y")), scale) - << "Scale_y for node '" + op_name + "'."; - EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_out")), scale) - << "Scale_out for node '" + op_name + "'."; - } else if (op->Type() == "quantize") { - quantize_nodes_count++; - } else if (op->Type() == "dequantize") { - dequantize_nodes_count++; - } - } - } - EXPECT_EQ(matmul_nodes_count, matmul_count); - EXPECT_EQ(quantize_nodes_count, quant_count); - EXPECT_EQ(dequantize_nodes_count, dequant_count); - EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num); -} - -TEST(CpuQuantizePass, matmul) { - int matmul_count = 1; - int quant_count = 2; - int dequant_count = 3; - // 2 Quant + 2 IN + 1 DeQuant + 1 OUT - int added_nodes_count = 6; - MainTestMatmul(BuildProgramDescMatmul(), matmul_count, quant_count, - dequant_count, added_nodes_count, 2.0f * 127); -} - -TEST(CpuQuantizePass, matmul_not_quantized) { - int matmul_count = 1; - int quant_count = 0; - int dequant_count = 1; - // nothing change - int added_nodes_count = 0; - MainTestMatmul(BuildProgramDescMatmulNotQuantized(), matmul_count, - quant_count, dequant_count, added_nodes_count, 1.0f); -} - -static const std::initializer_list variable_names_elementwise_add = - {"a", "b", "c", "d", "e", "f"}; - -ProgramDesc BuildProgramDescElementwiseAdd() { - ProgramDesc prog; - for (auto& v : variable_names_elementwise_add) { - prog.MutableBlock(0)->Var(v); - } - SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true); - SetOp(&prog, "dequantize", "Dequantize2", {"c"}, {"d"}, true); - SetOp(&prog, "elementwise_add", "ElementwiseAdd", {"b", "d"}, {"e"}, true, - "int8"); - SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, "float32"); - - return prog; -} - -void MainTestElementwiseAdd(const ProgramDesc& prog, int elementwise_add_count, - int quant_count, int dequant_count, - int added_nodes_count, float scale, - bool output_scale_missing = false, - bool unsigned_and_signed_input = false) { - std::unique_ptr graph(new ir::Graph(prog)); - int original_nodes_num, current_nodes_num; - PreparePass(&graph, prog, variable_names_elementwise_add, &original_nodes_num, - ¤t_nodes_num, output_scale_missing ? "e" : "", - unsigned_and_signed_input ? "b" : ""); - - int quantize_nodes_count = 0; - int dequantize_nodes_count = 0; - int elementwise_add_nodes_count = 0; - for (auto* node : graph->Nodes()) { - if (node->IsOp()) { - auto* op = node->Op(); - if (op->Type() == "elementwise_add") { - elementwise_add_nodes_count++; - if (unsigned_and_signed_input) scale = 1.0f; - auto op_name = BOOST_GET_CONST(std::string, op->GetAttr("name")); - EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_x")), scale) - << "Scale_x for node '" + op_name + "'."; - EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_y")), scale) - << "Scale_y for node '" + op_name + "'."; - if (output_scale_missing) scale = 1.0; - EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_out")), scale) - << "Scale_out for node '" + op_name + "'."; - } else if (op->Type() == "quantize") { - quantize_nodes_count++; - } else if (op->Type() == "dequantize") { - dequantize_nodes_count++; - } - } - } - EXPECT_EQ(elementwise_add_nodes_count, elementwise_add_count); - EXPECT_EQ(quantize_nodes_count, quant_count); - EXPECT_EQ(dequantize_nodes_count, dequant_count); - EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num); -} - -TEST(CpuQuantizePass, elementwise_add) { - int elementwise_add_count = 1; - int quant_count = 2; - int dequant_count = 3; - // 2 Quant + 2 IN + 1 DeQuant + 1 OUT - int added_nodes_count = 6; - MainTestElementwiseAdd(BuildProgramDescElementwiseAdd(), - elementwise_add_count, quant_count, dequant_count, - added_nodes_count, 2.0f * 127); -} - -TEST(CpuQuantizePass, elementwise_add_output_scale_missing) { - int elementwise_add_count = 1; - int quant_count = 0; - int dequant_count = 2; - int added_nodes_count = 0; - MainTestElementwiseAdd(BuildProgramDescElementwiseAdd(), - elementwise_add_count, quant_count, dequant_count, - added_nodes_count, 1.f, true); -} - -TEST(CpuQuantizePass, elementwise_add_unsigned_and_signed_input) { - int elementwise_add_count = 1; - int quant_count = 0; - int dequant_count = 2; - int added_nodes_count = 0; - MainTestElementwiseAdd(BuildProgramDescElementwiseAdd(), - elementwise_add_count, quant_count, dequant_count, - added_nodes_count, 2.0f * 127, false, true); -} - -} // namespace - } // namespace ir } // namespace framework } // namespace paddle