adding elementwiseadd quantization (#25178)

36abeff4 · Sylwester Fraczek · GitHub · 87a4a7ec · 36abeff4 · 36abeff4
4 changed file
--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
@@ -37,10 +37,11 @@ void UnlinkNodes(ir::Node* a, ir::Node* b) {
                  b->inputs.end());
 }
-void LogCannotQuantizeOp(Node* op) {
+void LogCannotQuantizeOp(Node* op, const char* details = nullptr) {
  std::stringstream msg_ss;
  msg_ss << "Cannot quantize operator " << op->Name()
         << " (type: " << op->Op()->Type() << ", id: " << op->id() << ").";
+  if (details) msg_ss << " " << details;
  PrettyLogDetail(msg_ss.str().c_str());
 }
@@ -51,6 +52,13 @@ void LogScaleIsMissingForVar(Node* var) {
  PrettyLogDetail(msg_ss.str().c_str());
 }
+void LogQuantizationDisabled(Node* op) {
+  std::stringstream msg_ss;
+  VLOG(4) << "Qantization skipped for operator " << op->Name()
+          << " (type: " << op->Op()->Type() << ", id: " << op->id()
+          << "). Attribute use_quantizer = false.";
+}
 }  // namespace
 enum { U8_MAX = 255, S8_MAX = 127 };
@@ -239,7 +247,10 @@ void CPUQuantizePass::QuantizeConv(Graph* graph,
    auto* conv_op_desc = conv_op->Op();
    // skip if should not be quantized
-    if (!conv_op_desc->GetAttrIfExists<bool>("use_quantizer")) return;
+    if (!conv_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
+      LogQuantizationDisabled(conv_op);
+      return;
+    }
    GET_IR_NODE_FROM_SUBGRAPH(conv_filter, conv_filter, conv_pattern);
    GET_IR_NODE_FROM_SUBGRAPH(conv_input, conv_input, conv_pattern);
@@ -333,9 +344,13 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const {
    auto* fc_op_desc = fc->Op();
    // skip if should not be quantized
-    if (fc_op_desc->GetAttrIfExists<bool>("use_quantizer") != true ||
+    if (!fc_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
-        fc_op_desc->GetAttrIfExists<bool>("use_mkldnn") != true)
+      LogQuantizationDisabled(fc);
+      return;
+    }
+    if (!fc_op_desc->GetAttrIfExists<bool>("use_mkldnn")) {
      return;
+    }
    GET_IR_NODE_FROM_SUBGRAPH(weights, weights, fc_pattern);
    GET_IR_NODE_FROM_SUBGRAPH(input, input, fc_pattern);
@@ -396,7 +411,10 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const {
    auto* pool_op_desc = pool_op->Op();
    // skip if should not be quantized
-    if (!pool_op_desc->GetAttrIfExists<bool>("use_quantizer")) return;
+    if (!pool_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
+      LogQuantizationDisabled(pool_op);
+      return;
+    }
    GET_IR_NODE_FROM_SUBGRAPH(pool_input, pool_input, pool_pattern);
    GET_IR_NODE_FROM_SUBGRAPH(pool_output, pool_output, pool_pattern);
@@ -438,7 +456,10 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const {
    auto* concat_op_desc = concat_op->Op();
    // skip if should not be quantized
-    if (!concat_op_desc->GetAttrIfExists<bool>("use_quantizer")) return;
+    if (!concat_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
+      LogQuantizationDisabled(concat_op);
+      return;
+    }
    GET_IR_NODE_FROM_SUBGRAPH(concat_out, concat_out, concat_pattern);
@@ -481,7 +502,10 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const {
    auto* prior_box_op_desc = prior_box_op->Op();
    // skip if should not be quantized
-    if (!prior_box_op_desc->GetAttrIfExists<bool>("use_quantizer")) return;
+    if (!prior_box_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
+      LogQuantizationDisabled(prior_box_op);
+      return;
+    }
    GET_IR_NODE_FROM_SUBGRAPH(prior_box_input, prior_box_input,
                              prior_box_pattern);
@@ -522,6 +546,7 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const {
    // skip if should not be quantized
    if (!transpose_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
+      LogQuantizationDisabled(transpose_op);
      return;
    }
    GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, transpose_pattern);
@@ -576,6 +601,7 @@ void CPUQuantizePass::QuantizeReshape(Graph* graph) const {
    // skip if should not be quantized
    if (!reshape_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
+      LogQuantizationDisabled(reshape_op);
      return;
    }
    GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, reshape_pattern);
@@ -628,6 +654,7 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const {
    // skip if should not be quantized
    if (!matmul_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
+      LogQuantizationDisabled(matmul_op);
      return;
    }
    GET_IR_NODE_FROM_SUBGRAPH(prev_op_x, prev_op_x, matmul_pattern);
@@ -676,6 +703,80 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const {
  PrettyLogDetail("---    quantized %d matmul ops", quantize_matmul_count);
 }
+void CPUQuantizePass::QuantizeElementwiseAdd(Graph* graph) const {
+  GraphPatternDetector gpd;
+  auto pattern = gpd.mutable_pattern();
+  patterns::ElementwiseAdd elementwise_add_pattern{pattern, name_scope_};
+  elementwise_add_pattern(
+      pattern->NewNode(elementwise_add_pattern.elementwise_add_x_repr()),
+      pattern->NewNode(elementwise_add_pattern.elementwise_add_y_repr()));
+  int quantize_elementwise_add_count = 0;
+  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
+                     Graph* g) {
+    VLOG(4) << "Quantize elementwise_add op";
+    GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_op, elementwise_add_op,
+                              elementwise_add_pattern);
+    auto* elementwise_add_op_desc = elementwise_add_op->Op();
+    // skip if should not be quantized
+    if (!elementwise_add_op_desc->GetAttrIfExists<bool>("use_quantizer")) {
+      LogQuantizationDisabled(elementwise_add_op);
+      return;
+    }
+    GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_x, elementwise_add_x,
+                              elementwise_add_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_y, elementwise_add_y,
+                              elementwise_add_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_out, elementwise_add_out,
+                              elementwise_add_pattern);
+    if (!AreScalesPresentForNodes(elementwise_add_op,
+                                  {elementwise_add_x, elementwise_add_y})) {
+      LogCannotQuantizeOp(elementwise_add_op);
+      return;
+    }
+    bool is_x_unsigned{false}, is_y_unsigned{false};
+    auto input_x_scale =
+        GetScaleValueForNode(elementwise_add_x, &is_x_unsigned);
+    auto input_y_scale =
+        GetScaleValueForNode(elementwise_add_y, &is_y_unsigned);
+    // TODO(sfraczek): add support for different signness
+    if (is_x_unsigned != is_y_unsigned) {
+      LogCannotQuantizeOp(elementwise_add_op,
+                          "ElementwiseAdd inputs must be of the same type.");
+      return;
+    }
+    QuantizeInput(g, elementwise_add_op, elementwise_add_x, "X", input_x_scale,
+                  is_x_unsigned, "Scale_x");
+    QuantizeInput(g, elementwise_add_op, elementwise_add_y, "Y", input_y_scale,
+                  is_y_unsigned, "Scale_y");
+    // if quantization scale is missing for output tensor, return fp32 data
+    if (AreScalesPresentForNodes(elementwise_add_op, {elementwise_add_out})) {
+      bool is_output_unsigned{false};
+      auto output_scale =
+          GetScaleValueForNode(elementwise_add_out, &is_output_unsigned);
+      DequantizeOutput(g, elementwise_add_op, elementwise_add_out, "Out",
+                       output_scale, is_output_unsigned, "Scale_out");
+    } else {
+      elementwise_add_op->Op()->SetAttr("force_fp32_output", true);
+    }
+    ++quantize_elementwise_add_count;
+  };
+  gpd(graph, handler);
+  AddStatis(quantize_elementwise_add_count);
+  PrettyLogDetail("---    quantized %d elementwise_add ops",
+                  quantize_elementwise_add_count);
+}
 void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
  VLOG(3) << "Quantizing the graph.";
  PADDLE_ENFORCE(graph);
@@ -692,6 +793,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
  QuantizeFc(graph);
  QuantizeReshape(graph);
  QuantizeMatmul(graph);
+  QuantizeElementwiseAdd(graph);
 }
 }  // namespace ir

--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
@@ -60,6 +60,8 @@ class CPUQuantizePass : public FusePassBase {
  void QuantizeMatmul(Graph* graph) const;
+  void QuantizeElementwiseAdd(Graph* graph) const;
  void QuantizeInput(Graph* g, Node* op, Node* input, std::string input_name,
                     double scale_to_one, bool is_unsigned,
                     std::string scale_attr_name = "") const;

--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
@@ -14,6 +14,7 @@
 #include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h"
 #include <gtest/gtest.h>
 #include "paddle/fluid/framework/naive_executor.h"
 #include "paddle/fluid/imperative/type_defs.h"
 #include "paddle/fluid/platform/place.h"
@@ -82,6 +83,14 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
    op->SetAttr("Scale_x", 1.0f);
    op->SetAttr("Scale_y", 1.0f);
    op->SetAttr("Scale_out", 1.0f);
+  } else if (type == "elementwise_add") {
+    op->SetInput("X", {inputs[0]});
+    if (inputs.size() > 1) op->SetInput("Y", {inputs[1]});
+    op->SetOutput("Out", {outputs[0]});
+    op->SetAttr("use_quantizer", use_quantizer);
+    op->SetAttr("Scale_x", 1.0f);
+    op->SetAttr("Scale_y", 1.0f);
+    op->SetAttr("Scale_out", 1.0f);
  }
 }
@@ -95,7 +104,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
 void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
                 const std::initializer_list<std::string> variable_names,
                 int* original_nodes_num, int* current_nodes_num,
-                 std::string var_without_scale = "") {
+                 std::string var_without_scale = "",
+                 std::string var_signed = "") {
  auto place = paddle::platform::CPUPlace();
  NaiveExecutor exe{place};
  Scope scope;
@@ -108,8 +118,7 @@ void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
    tensor.Resize({1});
    auto* ptr = tensor.mutable_data<double>(place);
    ptr[0] = 2.0;
+    (*scales)[v] = std::make_pair(v == var_signed, std::move(tensor));
-    (*scales)[v] = std::make_pair(false, std::move(tensor));
  }
  (*graph)->SetNotOwned(kParamScopeAttr, &scope);
@@ -387,7 +396,7 @@ static const std::initializer_list<std::string> variable_names_reshape = {
 // c->Dropout->d
 ProgramDesc BuildProgramDescReshape() {
  ProgramDesc prog;
-  for (auto& v : variable_names_transpose) {
+  for (auto& v : variable_names_reshape) {
    prog.MutableBlock(0)->Var(v);
  }
  SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true);
@@ -402,7 +411,7 @@ ProgramDesc BuildProgramDescReshape() {
 // c->Dropout->d
 ProgramDesc BuildProgramDescReshapeBetweenNonQuantizedOp() {
  ProgramDesc prog;
-  for (auto& v : variable_names_transpose) {
+  for (auto& v : variable_names_reshape) {
    prog.MutableBlock(0)->Var(v);
  }
@@ -491,7 +500,7 @@ static const std::initializer_list<std::string> variable_names_matmul = {
 ProgramDesc BuildProgramDescMatmul() {
  ProgramDesc prog;
-  for (auto& v : variable_names_transpose) {
+  for (auto& v : variable_names_matmul) {
    prog.MutableBlock(0)->Var(v);
  }
  SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true);
@@ -504,7 +513,7 @@ ProgramDesc BuildProgramDescMatmul() {
 ProgramDesc BuildProgramDescMatmulNotQuantized() {
  ProgramDesc prog;
-  for (auto& v : variable_names_transpose) {
+  for (auto& v : variable_names_matmul) {
    prog.MutableBlock(0)->Var(v);
  }
  SetOp(&prog, "dropout", "Dropout", {"a"}, {"b"}, false);
@@ -569,6 +578,97 @@ TEST(CpuQuantizePass, matmul_not_quantized) {
  MainTestMatmul(BuildProgramDescMatmulNotQuantized(), matmul_count,
                 quant_count, dequant_count, added_nodes_count, 1.0f);
 }
+static const std::initializer_list<std::string> variable_names_elementwise_add =
+    {"a", "b", "c", "d", "e", "f"};
+ProgramDesc BuildProgramDescElementwiseAdd() {
+  ProgramDesc prog;
+  for (auto& v : variable_names_elementwise_add) {
+    prog.MutableBlock(0)->Var(v);
+  }
+  SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true);
+  SetOp(&prog, "dequantize", "Dequantize2", {"c"}, {"d"}, true);
+  SetOp(&prog, "elementwise_add", "ElementwiseAdd", {"b", "d"}, {"e"}, true,
+        true);
+  SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, false);
+  return prog;
+}
+void MainTestElementwiseAdd(const ProgramDesc& prog, int elementwise_add_count,
+                            int quant_count, int dequant_count,
+                            int added_nodes_count, float scale,
+                            bool output_scale_missing = false,
+                            bool unsigned_and_signed_input = false) {
+  std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
+  int original_nodes_num, current_nodes_num;
+  PreparePass(&graph, prog, variable_names_elementwise_add, &original_nodes_num,
+              &current_nodes_num, output_scale_missing ? "e" : "",
+              unsigned_and_signed_input ? "b" : "");
+  int quantize_nodes_count = 0;
+  int dequantize_nodes_count = 0;
+  int elementwise_add_nodes_count = 0;
+  for (auto* node : graph->Nodes()) {
+    if (node->IsOp()) {
+      auto* op = node->Op();
+      if (op->Type() == "elementwise_add") {
+        elementwise_add_nodes_count++;
+        if (unsigned_and_signed_input) scale = 1.0f;
+        auto op_name = BOOST_GET_CONST(std::string, op->GetAttr("name"));
+        EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_x")), scale)
+            << "Scale_x for node '" + op_name + "'.";
+        EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_y")), scale)
+            << "Scale_y for node '" + op_name + "'.";
+        if (output_scale_missing) scale = 1.0;
+        EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_out")), scale)
+            << "Scale_out for node '" + op_name + "'.";
+      } else if (op->Type() == "quantize") {
+        quantize_nodes_count++;
+      } else if (op->Type() == "dequantize") {
+        dequantize_nodes_count++;
+      }
+    }
+  }
+  EXPECT_EQ(elementwise_add_nodes_count, elementwise_add_count);
+  EXPECT_EQ(quantize_nodes_count, quant_count);
+  EXPECT_EQ(dequantize_nodes_count, dequant_count);
+  EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num);
+}
+TEST(CpuQuantizePass, elementwise_add) {
+  int elementwise_add_count = 1;
+  int quant_count = 2;
+  int dequant_count = 3;
+  // 2 Quant + 2 IN + 1 DeQuant + 1 OUT
+  int added_nodes_count = 6;
+  MainTestElementwiseAdd(BuildProgramDescElementwiseAdd(),
+                         elementwise_add_count, quant_count, dequant_count,
+                         added_nodes_count, 2.0f * 127);
+}
+TEST(CpuQuantizePass, elementwise_add_output_scale_missing) {
+  int elementwise_add_count = 1;
+  int quant_count = 2;
+  int dequant_count = 2;
+  // 2 Quant + 2 IN
+  int added_nodes_count = 4;
+  MainTestElementwiseAdd(BuildProgramDescElementwiseAdd(),
+                         elementwise_add_count, quant_count, dequant_count,
+                         added_nodes_count, 2.0f * 127, true);
+}
+TEST(CpuQuantizePass, elementwise_add_unsigned_and_signed_input) {
+  int elementwise_add_count = 1;
+  int quant_count = 0;
+  int dequant_count = 2;
+  int added_nodes_count = 0;
+  MainTestElementwiseAdd(BuildProgramDescElementwiseAdd(),
+                         elementwise_add_count, quant_count, dequant_count,
+                         added_nodes_count, 2.0f * 127, false, true);
+}
 }  // namespace
 }  // namespace ir

--- a/paddle/fluid/inference/api/mkldnn_quantizer_config.cc
+++ b/paddle/fluid/inference/api/mkldnn_quantizer_config.cc
@@ -49,6 +49,10 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() {
  rules_["matmul"]["Y"] = ScaleAlgo::KL;
  rules_["matmul"]["Out"] = ScaleAlgo::KL;
+  rules_["elementwise_add"]["X"] = ScaleAlgo::KL;
+  rules_["elementwise_add"]["Y"] = ScaleAlgo::KL;
+  rules_["elementwise_add"]["Out"] = ScaleAlgo::KL;
  // Reshape2 does not perform calculation on the data and shapes are not
  // changed. Scale is calculated on input data and assign to Quantize and
  // Dequantize scale.