Add scale-matmul fuse pass (#23734)

12ba05ce · joanna.wozna.intel · GitHub · 96add8a4 · 12ba05ce · 12ba05ce
8 changed file
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -92,6 +92,7 @@ if(WITH_MKLDNN)
    pass_library(conv_activation_mkldnn_fuse_pass inference DIR mkldnn)
    pass_library(conv_concat_relu_mkldnn_fuse_pass inference DIR mkldnn)
    pass_library(conv_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn)
+    pass_library(scale_matmul_fuse_pass inference DIR mkldnn)
    pass_library(fc_mkldnn_pass inference DIR mkldnn)
    pass_library(cpu_quantize_placement_pass base DIR mkldnn)
    pass_library(cpu_quantize_pass inference DIR mkldnn)
@@ -137,6 +138,7 @@ if (WITH_MKLDNN)
    cc_test(test_conv_activation_mkldnn_fuse_pass SRCS mkldnn/conv_activation_mkldnn_fuse_pass_tester.cc DEPS conv_activation_mkldnn_fuse_pass)
    cc_test(test_conv_concat_relu_mkldnn_fuse_pass SRCS mkldnn/conv_concat_relu_mkldnn_fuse_pass_tester.cc DEPS conv_concat_relu_mkldnn_fuse_pass)
    cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass)
+    cc_test(test_scale_matmul_fuse_pass SRCS mkldnn/scale_matmul_fuse_pass_tester.cc DEPS scale_matmul_fuse_pass)
    cc_test(test_mkldnn_placement_pass SRCS mkldnn/mkldnn_placement_pass_tester.cc DEPS mkldnn_placement_pass)
    cc_test(test_mkldnn_inplace_pass SRCS mkldnn/mkldnn_inplace_pass_tester.cc DEPS mkldnn_inplace_pass)
    cc_test(test_cpu_quantize_placement_pass SRCS mkldnn/cpu_quantize_placement_pass_tester.cc DEPS cpu_quantize_placement_pass)

--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -1581,6 +1581,21 @@ PDNode *patterns::MatmulDequant::operator()() {
  return dequant_out;
 }

+PDNode *patterns::ScaleMatmul::operator()() {
+  auto scale_in = pattern->NewNode(scale_in_repr())
+                      ->AsInput()
+                      ->assert_is_op_input("scale", "X");
+  auto scale_op = pattern->NewNode(scale_op_repr())->assert_is_op("scale");
+  auto scale_out = pattern->NewNode(scale_out_repr())
+                       ->AsOutput()
+                       ->assert_is_op_output("scale", "Out");
+  auto matmul_op = pattern->NewNode(matmul_op_repr())->assert_is_op("matmul");
+
+  scale_op->LinksFrom({scale_in}).LinksTo({scale_out});
+  matmul_op->LinksFrom({scale_out});
+  return matmul_op;
+}
+
 PDNode *patterns::PriorBox::operator()() {
  auto prior_box_op =
      pattern->NewNode(prior_box_op_repr())->assert_is_op("prior_box");

--- a/paddle/fluid/framework/ir/graph_pattern_detector.h
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.h
@@ -972,6 +972,18 @@ struct MatmulDequant : public PatternBase {
  PATTERN_DECL_NODE(dequant_out);
 };

+// Scale + Matmul
+struct ScaleMatmul : public PatternBase {
+  ScaleMatmul(PDPattern* pattern, const std::string& name_scope)
+      : PatternBase(pattern, name_scope, "scale_matmul") {}
+
+  PDNode* operator()();
+  PATTERN_DECL_NODE(scale_in);
+  PATTERN_DECL_NODE(scale_op);
+  PATTERN_DECL_NODE(scale_out);
+  PATTERN_DECL_NODE(matmul_op);
+};
+
 // PriorBox operator
 // operator: prior_box_op
 // inputs: prior_box_input, prior_box_image

--- a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h"
+#include <string>
+#include <vector>
+#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
+#include "paddle/fluid/string/pretty_log.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+using string::PrettyLogDetail;
+
+void ScaleMatmulFusePass::ApplyImpl(ir::Graph* graph) const {
+  PADDLE_ENFORCE_NOT_NULL(graph,
+                          platform::errors::InvalidArgument(
+                              "Pointer to graph argument should not be NULL."));
+
+  FusePassBase::Init("scale_matmul_fuse_pass", graph);
+  GraphPatternDetector gpd;
+  patterns::ScaleMatmul scale_matmul_pattern{gpd.mutable_pattern(),
+                                             "scale_matmul"};
+  scale_matmul_pattern();
+
+  int found_scale_matmul_fuse_count = 0;
+  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
+                     Graph* g) {
+    GET_IR_NODE_FROM_SUBGRAPH(scale_in, scale_in, scale_matmul_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(scale_op, scale_op, scale_matmul_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(scale_out, scale_out, scale_matmul_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(matmul_op, matmul_op, scale_matmul_pattern);
+
+    if (scale_op->Op()->GetAttrIfExists<float>("bias") == 0.0) {
+      auto matmul_alpha = matmul_op->Op()->GetAttrIfExists<float>("alpha");
+      auto scale_scale = scale_op->Op()->GetAttrIfExists<float>("scale");
+      PADDLE_ENFORCE_GT(matmul_alpha, 0.0f,
+                        platform::errors::InvalidArgument(
+                            "Alpha of matmul op should have positive value"));
+      PADDLE_ENFORCE_GT(scale_scale, 0.0f,
+                        platform::errors::InvalidArgument(
+                            "Scale of scale op should have positive value"));
+
+      std::string matmul_op_input_name;
+      for (auto name : matmul_op->Op()->InputNames())
+        for (auto input_name : matmul_op->Op()->Input(name))
+          if (input_name == scale_out->Name()) matmul_op_input_name = name;
+
+      PADDLE_ENFORCE_NE(
+          matmul_op_input_name.empty(), true,
+          platform::errors::NotFound("Operator after scale operator "
+                                     "should have scale output as input"));
+      matmul_op->Op()->SetAttr("alpha", matmul_alpha * scale_scale);
+      matmul_op->Op()->SetInput(matmul_op_input_name,
+                                std::vector<std::string>({scale_in->Name()}));
+      IR_NODE_LINK_TO(scale_in, matmul_op);
+      GraphSafeRemoveNodes(graph, {scale_op, scale_out});
+      found_scale_matmul_fuse_count++;
+    }
+  };
+  gpd(graph, handler);
+  AddStatis(found_scale_matmul_fuse_count);
+  PrettyLogDetail("---    fused %d scale with matmul",
+                  found_scale_matmul_fuse_count);
+}
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+REGISTER_PASS(scale_matmul_fuse_pass,
+              paddle::framework::ir::ScaleMatmulFusePass);
--- a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h
+++ b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/fluid/framework/ir/fuse_pass_base.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+class ScaleMatmulFusePass : public FusePassBase {
+ public:
+  virtual ~ScaleMatmulFusePass() {}
+
+ protected:
+  void ApplyImpl(ir::Graph* graph) const override;
+};
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass_tester.cc
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h"
+#include <gtest/gtest.h>
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+void SetOp(ProgramDesc* prog, const std::string& type,
+           const std::vector<std::string>& inputs,
+           const std::vector<std::string>& outputs, float scale = 1.0f,
+           float bias = 0.0f) {
+  auto* op = prog->MutableBlock(0)->AppendOp();
+
+  op->SetType(type);
+  if (type == "scale") {
+    op->SetInput("X", {inputs[0]});
+    op->SetAttr("scale", scale);
+    op->SetAttr("bias", bias);
+  } else if (type == "matmul") {
+    op->SetInput("X", {inputs[0]});
+    if (inputs.size() > 1) op->SetInput("Y", {inputs[1]});
+    op->SetAttr("alpha", scale);
+  } else {
+    FAIL() << "Unexpected operator type.";
+  }
+  op->SetOutput("Out", {outputs[0]});
+}
+
+// a->scale->b
+// (b,c)->matmul->d
+ProgramDesc BuildProgramDesc(float scale, float bias, float alpha) {
+  ProgramDesc prog;
+
+  for (auto& v : std::vector<std::string>({"a", "b", "c", "d"})) {
+    prog.MutableBlock(0)->Var(v);
+  }
+  SetOp(&prog, "scale", {"a"}, {"b"}, scale, bias);
+  SetOp(&prog, "matmul", {"b", "c"}, {"d"}, alpha);
+  return prog;
+}
+
+void MainTest(const ProgramDesc& prog, int removed_nodes_count,
+              const std::vector<std::string> scale_in_out,
+              const std::vector<std::string> matmul_in_out, float alpha) {
+  std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
+  int original_nodes_num = graph->Nodes().size();
+  auto pass = PassRegistry::Instance().Get("scale_matmul_fuse_pass");
+  graph.reset(pass->Apply(graph.release()));
+  int current_nodes_num = graph->Nodes().size();
+
+  for (auto* node : graph->Nodes()) {
+    if (node->IsOp()) {
+      auto* op = node->Op();
+      if (op->Type() == "scale") {
+        EXPECT_EQ(op->Input("X")[0], scale_in_out[0]);
+        EXPECT_EQ(op->Output("Out")[0], scale_in_out[1]);
+      } else if (op->Type() == "matmul") {
+        EXPECT_EQ(op->Input("X")[0], matmul_in_out[0]);
+        EXPECT_EQ(op->Input("Y")[0], matmul_in_out[1]);
+        EXPECT_EQ(op->Output("Out")[0], matmul_in_out[2]);
+        EXPECT_EQ(op->GetAttrIfExists<float>("alpha"), alpha);
+      }
+    }
+  }
+  EXPECT_EQ(original_nodes_num - removed_nodes_count, current_nodes_num);
+}
+
+TEST(ScaleMatmulFusePass, scale_matmul_with_no_bias) {
+  auto bias = 0.0f;
+  auto scale = 2.34f;
+  auto alpha = 3.45f;
+  int removed_nodes_count = 2;
+  MainTest(BuildProgramDesc(scale, bias, alpha), removed_nodes_count, {},
+           {"a", "c", "d"}, scale * alpha);
+}
+
+TEST(ScaleMatmulFusePass, scale_matmul_with_bias) {
+  auto bias = 1.0f;
+  auto scale = 2.34f;
+  auto alpha = 3.45f;
+  int removed_nodes_count = 0;
+  MainTest(BuildProgramDesc(scale, bias, alpha), removed_nodes_count,
+           {"a", "b"}, {"b", "c", "d"}, alpha);
+}
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+USE_PASS(scale_matmul_fuse_pass);
--- a/paddle/fluid/inference/api/paddle_pass_builder.cc
+++ b/paddle/fluid/inference/api/paddle_pass_builder.cc
@@ -195,6 +195,7 @@ void CpuPassStrategy::EnableMKLDNN() {
             "conv_leaky_relu_mkldnn_fuse_pass",  //
             "conv_relu6_mkldnn_fuse_pass",       //
             "conv_swish_mkldnn_fuse_pass",       //
+             "scale_matmul_fuse_pass",            //
             // Disabled due to topology-dependent speed-up
             // "fc_mkldnn_pass",
             "mkldnn_inplace_pass",  // This pass should be activated after

--- a/python/paddle/fluid/contrib/slim/quantization/qat2_int8_mkldnn_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/qat2_int8_mkldnn_pass.py
@@ -470,7 +470,7 @@ class Qat2Int8MkldnnPass(object):
        if self._debug:
            graph.draw('.', 'qat_int8_{}'.format(ir_pass.type()),
                       graph.all_op_nodes())
-
+        graph = self._apply_pass(graph, 'scale_matmul_fuse_pass')
        graph = self._apply_pass(
            graph, 'cpu_quantize_pass', ['quant_var_scales', 'data_layout'],
            [self._var_quant_scales, self._get_data_layout()])