From 36abeff44f403d898f1d872cac8b5dbd383932e6 Mon Sep 17 00:00:00 2001 From: Sylwester Fraczek Date: Tue, 30 Jun 2020 03:52:23 +0200 Subject: [PATCH] adding elementwiseadd quantization (#25178) --- .../framework/ir/mkldnn/cpu_quantize_pass.cc | 116 ++++++++++++++++-- .../framework/ir/mkldnn/cpu_quantize_pass.h | 2 + .../ir/mkldnn/cpu_quantize_pass_tester.cc | 114 +++++++++++++++-- .../inference/api/mkldnn_quantizer_config.cc | 4 + 4 files changed, 222 insertions(+), 14 deletions(-) diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc index f47ef2162da..246cbf36e09 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc @@ -37,10 +37,11 @@ void UnlinkNodes(ir::Node* a, ir::Node* b) { b->inputs.end()); } -void LogCannotQuantizeOp(Node* op) { +void LogCannotQuantizeOp(Node* op, const char* details = nullptr) { std::stringstream msg_ss; msg_ss << "Cannot quantize operator " << op->Name() << " (type: " << op->Op()->Type() << ", id: " << op->id() << ")."; + if (details) msg_ss << " " << details; PrettyLogDetail(msg_ss.str().c_str()); } @@ -51,6 +52,13 @@ void LogScaleIsMissingForVar(Node* var) { PrettyLogDetail(msg_ss.str().c_str()); } +void LogQuantizationDisabled(Node* op) { + std::stringstream msg_ss; + VLOG(4) << "Qantization skipped for operator " << op->Name() + << " (type: " << op->Op()->Type() << ", id: " << op->id() + << "). Attribute use_quantizer = false."; +} + } // namespace enum { U8_MAX = 255, S8_MAX = 127 }; @@ -239,7 +247,10 @@ void CPUQuantizePass::QuantizeConv(Graph* graph, auto* conv_op_desc = conv_op->Op(); // skip if should not be quantized - if (!conv_op_desc->GetAttrIfExists("use_quantizer")) return; + if (!conv_op_desc->GetAttrIfExists("use_quantizer")) { + LogQuantizationDisabled(conv_op); + return; + } GET_IR_NODE_FROM_SUBGRAPH(conv_filter, conv_filter, conv_pattern); GET_IR_NODE_FROM_SUBGRAPH(conv_input, conv_input, conv_pattern); @@ -333,9 +344,13 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const { auto* fc_op_desc = fc->Op(); // skip if should not be quantized - if (fc_op_desc->GetAttrIfExists("use_quantizer") != true || - fc_op_desc->GetAttrIfExists("use_mkldnn") != true) + if (!fc_op_desc->GetAttrIfExists("use_quantizer")) { + LogQuantizationDisabled(fc); + return; + } + if (!fc_op_desc->GetAttrIfExists("use_mkldnn")) { return; + } GET_IR_NODE_FROM_SUBGRAPH(weights, weights, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(input, input, fc_pattern); @@ -396,7 +411,10 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const { auto* pool_op_desc = pool_op->Op(); // skip if should not be quantized - if (!pool_op_desc->GetAttrIfExists("use_quantizer")) return; + if (!pool_op_desc->GetAttrIfExists("use_quantizer")) { + LogQuantizationDisabled(pool_op); + return; + } GET_IR_NODE_FROM_SUBGRAPH(pool_input, pool_input, pool_pattern); GET_IR_NODE_FROM_SUBGRAPH(pool_output, pool_output, pool_pattern); @@ -438,7 +456,10 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const { auto* concat_op_desc = concat_op->Op(); // skip if should not be quantized - if (!concat_op_desc->GetAttrIfExists("use_quantizer")) return; + if (!concat_op_desc->GetAttrIfExists("use_quantizer")) { + LogQuantizationDisabled(concat_op); + return; + } GET_IR_NODE_FROM_SUBGRAPH(concat_out, concat_out, concat_pattern); @@ -481,7 +502,10 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const { auto* prior_box_op_desc = prior_box_op->Op(); // skip if should not be quantized - if (!prior_box_op_desc->GetAttrIfExists("use_quantizer")) return; + if (!prior_box_op_desc->GetAttrIfExists("use_quantizer")) { + LogQuantizationDisabled(prior_box_op); + return; + } GET_IR_NODE_FROM_SUBGRAPH(prior_box_input, prior_box_input, prior_box_pattern); @@ -522,6 +546,7 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const { // skip if should not be quantized if (!transpose_op_desc->GetAttrIfExists("use_quantizer")) { + LogQuantizationDisabled(transpose_op); return; } GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, transpose_pattern); @@ -576,6 +601,7 @@ void CPUQuantizePass::QuantizeReshape(Graph* graph) const { // skip if should not be quantized if (!reshape_op_desc->GetAttrIfExists("use_quantizer")) { + LogQuantizationDisabled(reshape_op); return; } GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, reshape_pattern); @@ -628,6 +654,7 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const { // skip if should not be quantized if (!matmul_op_desc->GetAttrIfExists("use_quantizer")) { + LogQuantizationDisabled(matmul_op); return; } GET_IR_NODE_FROM_SUBGRAPH(prev_op_x, prev_op_x, matmul_pattern); @@ -676,6 +703,80 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const { PrettyLogDetail("--- quantized %d matmul ops", quantize_matmul_count); } +void CPUQuantizePass::QuantizeElementwiseAdd(Graph* graph) const { + GraphPatternDetector gpd; + auto pattern = gpd.mutable_pattern(); + patterns::ElementwiseAdd elementwise_add_pattern{pattern, name_scope_}; + + elementwise_add_pattern( + pattern->NewNode(elementwise_add_pattern.elementwise_add_x_repr()), + pattern->NewNode(elementwise_add_pattern.elementwise_add_y_repr())); + + int quantize_elementwise_add_count = 0; + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + VLOG(4) << "Quantize elementwise_add op"; + GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_op, elementwise_add_op, + elementwise_add_pattern); + auto* elementwise_add_op_desc = elementwise_add_op->Op(); + + // skip if should not be quantized + if (!elementwise_add_op_desc->GetAttrIfExists("use_quantizer")) { + LogQuantizationDisabled(elementwise_add_op); + return; + } + + GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_x, elementwise_add_x, + elementwise_add_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_y, elementwise_add_y, + elementwise_add_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_out, elementwise_add_out, + elementwise_add_pattern); + + if (!AreScalesPresentForNodes(elementwise_add_op, + {elementwise_add_x, elementwise_add_y})) { + LogCannotQuantizeOp(elementwise_add_op); + return; + } + + bool is_x_unsigned{false}, is_y_unsigned{false}; + auto input_x_scale = + GetScaleValueForNode(elementwise_add_x, &is_x_unsigned); + auto input_y_scale = + GetScaleValueForNode(elementwise_add_y, &is_y_unsigned); + + // TODO(sfraczek): add support for different signness + if (is_x_unsigned != is_y_unsigned) { + LogCannotQuantizeOp(elementwise_add_op, + "ElementwiseAdd inputs must be of the same type."); + return; + } + + QuantizeInput(g, elementwise_add_op, elementwise_add_x, "X", input_x_scale, + is_x_unsigned, "Scale_x"); + QuantizeInput(g, elementwise_add_op, elementwise_add_y, "Y", input_y_scale, + is_y_unsigned, "Scale_y"); + + // if quantization scale is missing for output tensor, return fp32 data + if (AreScalesPresentForNodes(elementwise_add_op, {elementwise_add_out})) { + bool is_output_unsigned{false}; + auto output_scale = + GetScaleValueForNode(elementwise_add_out, &is_output_unsigned); + DequantizeOutput(g, elementwise_add_op, elementwise_add_out, "Out", + output_scale, is_output_unsigned, "Scale_out"); + } else { + elementwise_add_op->Op()->SetAttr("force_fp32_output", true); + } + + ++quantize_elementwise_add_count; + }; + gpd(graph, handler); + AddStatis(quantize_elementwise_add_count); + + PrettyLogDetail("--- quantized %d elementwise_add ops", + quantize_elementwise_add_count); +} + void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const { VLOG(3) << "Quantizing the graph."; PADDLE_ENFORCE(graph); @@ -692,6 +793,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const { QuantizeFc(graph); QuantizeReshape(graph); QuantizeMatmul(graph); + QuantizeElementwiseAdd(graph); } } // namespace ir diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h index cd5c673061b..21219e7dca8 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h @@ -60,6 +60,8 @@ class CPUQuantizePass : public FusePassBase { void QuantizeMatmul(Graph* graph) const; + void QuantizeElementwiseAdd(Graph* graph) const; + void QuantizeInput(Graph* g, Node* op, Node* input, std::string input_name, double scale_to_one, bool is_unsigned, std::string scale_attr_name = "") const; diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc index 67a9957cb06..395b419cac1 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h" #include + #include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/platform/place.h" @@ -82,6 +83,14 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, op->SetAttr("Scale_x", 1.0f); op->SetAttr("Scale_y", 1.0f); op->SetAttr("Scale_out", 1.0f); + } else if (type == "elementwise_add") { + op->SetInput("X", {inputs[0]}); + if (inputs.size() > 1) op->SetInput("Y", {inputs[1]}); + op->SetOutput("Out", {outputs[0]}); + op->SetAttr("use_quantizer", use_quantizer); + op->SetAttr("Scale_x", 1.0f); + op->SetAttr("Scale_y", 1.0f); + op->SetAttr("Scale_out", 1.0f); } } @@ -95,7 +104,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place, void PreparePass(std::unique_ptr* graph, const ProgramDesc& prog, const std::initializer_list variable_names, int* original_nodes_num, int* current_nodes_num, - std::string var_without_scale = "") { + std::string var_without_scale = "", + std::string var_signed = "") { auto place = paddle::platform::CPUPlace(); NaiveExecutor exe{place}; Scope scope; @@ -108,8 +118,7 @@ void PreparePass(std::unique_ptr* graph, const ProgramDesc& prog, tensor.Resize({1}); auto* ptr = tensor.mutable_data(place); ptr[0] = 2.0; - - (*scales)[v] = std::make_pair(false, std::move(tensor)); + (*scales)[v] = std::make_pair(v == var_signed, std::move(tensor)); } (*graph)->SetNotOwned(kParamScopeAttr, &scope); @@ -387,7 +396,7 @@ static const std::initializer_list variable_names_reshape = { // c->Dropout->d ProgramDesc BuildProgramDescReshape() { ProgramDesc prog; - for (auto& v : variable_names_transpose) { + for (auto& v : variable_names_reshape) { prog.MutableBlock(0)->Var(v); } SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true); @@ -402,7 +411,7 @@ ProgramDesc BuildProgramDescReshape() { // c->Dropout->d ProgramDesc BuildProgramDescReshapeBetweenNonQuantizedOp() { ProgramDesc prog; - for (auto& v : variable_names_transpose) { + for (auto& v : variable_names_reshape) { prog.MutableBlock(0)->Var(v); } @@ -491,7 +500,7 @@ static const std::initializer_list variable_names_matmul = { ProgramDesc BuildProgramDescMatmul() { ProgramDesc prog; - for (auto& v : variable_names_transpose) { + for (auto& v : variable_names_matmul) { prog.MutableBlock(0)->Var(v); } SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true); @@ -504,7 +513,7 @@ ProgramDesc BuildProgramDescMatmul() { ProgramDesc BuildProgramDescMatmulNotQuantized() { ProgramDesc prog; - for (auto& v : variable_names_transpose) { + for (auto& v : variable_names_matmul) { prog.MutableBlock(0)->Var(v); } SetOp(&prog, "dropout", "Dropout", {"a"}, {"b"}, false); @@ -569,6 +578,97 @@ TEST(CpuQuantizePass, matmul_not_quantized) { MainTestMatmul(BuildProgramDescMatmulNotQuantized(), matmul_count, quant_count, dequant_count, added_nodes_count, 1.0f); } + +static const std::initializer_list variable_names_elementwise_add = + {"a", "b", "c", "d", "e", "f"}; + +ProgramDesc BuildProgramDescElementwiseAdd() { + ProgramDesc prog; + for (auto& v : variable_names_elementwise_add) { + prog.MutableBlock(0)->Var(v); + } + SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true); + SetOp(&prog, "dequantize", "Dequantize2", {"c"}, {"d"}, true); + SetOp(&prog, "elementwise_add", "ElementwiseAdd", {"b", "d"}, {"e"}, true, + true); + SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, false); + + return prog; +} + +void MainTestElementwiseAdd(const ProgramDesc& prog, int elementwise_add_count, + int quant_count, int dequant_count, + int added_nodes_count, float scale, + bool output_scale_missing = false, + bool unsigned_and_signed_input = false) { + std::unique_ptr graph(new ir::Graph(prog)); + int original_nodes_num, current_nodes_num; + PreparePass(&graph, prog, variable_names_elementwise_add, &original_nodes_num, + ¤t_nodes_num, output_scale_missing ? "e" : "", + unsigned_and_signed_input ? "b" : ""); + + int quantize_nodes_count = 0; + int dequantize_nodes_count = 0; + int elementwise_add_nodes_count = 0; + for (auto* node : graph->Nodes()) { + if (node->IsOp()) { + auto* op = node->Op(); + if (op->Type() == "elementwise_add") { + elementwise_add_nodes_count++; + if (unsigned_and_signed_input) scale = 1.0f; + auto op_name = BOOST_GET_CONST(std::string, op->GetAttr("name")); + EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_x")), scale) + << "Scale_x for node '" + op_name + "'."; + EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_y")), scale) + << "Scale_y for node '" + op_name + "'."; + if (output_scale_missing) scale = 1.0; + EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_out")), scale) + << "Scale_out for node '" + op_name + "'."; + } else if (op->Type() == "quantize") { + quantize_nodes_count++; + } else if (op->Type() == "dequantize") { + dequantize_nodes_count++; + } + } + } + EXPECT_EQ(elementwise_add_nodes_count, elementwise_add_count); + EXPECT_EQ(quantize_nodes_count, quant_count); + EXPECT_EQ(dequantize_nodes_count, dequant_count); + EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num); +} + +TEST(CpuQuantizePass, elementwise_add) { + int elementwise_add_count = 1; + int quant_count = 2; + int dequant_count = 3; + // 2 Quant + 2 IN + 1 DeQuant + 1 OUT + int added_nodes_count = 6; + MainTestElementwiseAdd(BuildProgramDescElementwiseAdd(), + elementwise_add_count, quant_count, dequant_count, + added_nodes_count, 2.0f * 127); +} + +TEST(CpuQuantizePass, elementwise_add_output_scale_missing) { + int elementwise_add_count = 1; + int quant_count = 2; + int dequant_count = 2; + // 2 Quant + 2 IN + int added_nodes_count = 4; + MainTestElementwiseAdd(BuildProgramDescElementwiseAdd(), + elementwise_add_count, quant_count, dequant_count, + added_nodes_count, 2.0f * 127, true); +} + +TEST(CpuQuantizePass, elementwise_add_unsigned_and_signed_input) { + int elementwise_add_count = 1; + int quant_count = 0; + int dequant_count = 2; + int added_nodes_count = 0; + MainTestElementwiseAdd(BuildProgramDescElementwiseAdd(), + elementwise_add_count, quant_count, dequant_count, + added_nodes_count, 2.0f * 127, false, true); +} + } // namespace } // namespace ir diff --git a/paddle/fluid/inference/api/mkldnn_quantizer_config.cc b/paddle/fluid/inference/api/mkldnn_quantizer_config.cc index 9ff5ef133e1..76cf1661f30 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer_config.cc +++ b/paddle/fluid/inference/api/mkldnn_quantizer_config.cc @@ -49,6 +49,10 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() { rules_["matmul"]["Y"] = ScaleAlgo::KL; rules_["matmul"]["Out"] = ScaleAlgo::KL; + rules_["elementwise_add"]["X"] = ScaleAlgo::KL; + rules_["elementwise_add"]["Y"] = ScaleAlgo::KL; + rules_["elementwise_add"]["Out"] = ScaleAlgo::KL; + // Reshape2 does not perform calculation on the data and shapes are not // changed. Scale is calculated on input data and assign to Quantize and // Dequantize scale. -- GitLab