From 12ba05ce0c5c798565e761a938674f55aa856bf3 Mon Sep 17 00:00:00 2001 From: "joanna.wozna.intel" Date: Mon, 13 Apr 2020 10:30:43 +0200 Subject: [PATCH] Add scale-matmul fuse pass (#23734) --- paddle/fluid/framework/ir/CMakeLists.txt | 2 + .../framework/ir/graph_pattern_detector.cc | 15 +++ .../framework/ir/graph_pattern_detector.h | 12 ++ .../ir/mkldnn/scale_matmul_fuse_pass.cc | 84 ++++++++++++++ .../ir/mkldnn/scale_matmul_fuse_pass.h | 33 ++++++ .../mkldnn/scale_matmul_fuse_pass_tester.cc | 104 ++++++++++++++++++ .../inference/api/paddle_pass_builder.cc | 1 + .../quantization/qat2_int8_mkldnn_pass.py | 2 +- 8 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc create mode 100644 paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h create mode 100644 paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass_tester.cc diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index eb1e5aca30..db78c6bb82 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -92,6 +92,7 @@ if(WITH_MKLDNN) pass_library(conv_activation_mkldnn_fuse_pass inference DIR mkldnn) pass_library(conv_concat_relu_mkldnn_fuse_pass inference DIR mkldnn) pass_library(conv_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) + pass_library(scale_matmul_fuse_pass inference DIR mkldnn) pass_library(fc_mkldnn_pass inference DIR mkldnn) pass_library(cpu_quantize_placement_pass base DIR mkldnn) pass_library(cpu_quantize_pass inference DIR mkldnn) @@ -137,6 +138,7 @@ if (WITH_MKLDNN) cc_test(test_conv_activation_mkldnn_fuse_pass SRCS mkldnn/conv_activation_mkldnn_fuse_pass_tester.cc DEPS conv_activation_mkldnn_fuse_pass) cc_test(test_conv_concat_relu_mkldnn_fuse_pass SRCS mkldnn/conv_concat_relu_mkldnn_fuse_pass_tester.cc DEPS conv_concat_relu_mkldnn_fuse_pass) cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass) + cc_test(test_scale_matmul_fuse_pass SRCS mkldnn/scale_matmul_fuse_pass_tester.cc DEPS scale_matmul_fuse_pass) cc_test(test_mkldnn_placement_pass SRCS mkldnn/mkldnn_placement_pass_tester.cc DEPS mkldnn_placement_pass) cc_test(test_mkldnn_inplace_pass SRCS mkldnn/mkldnn_inplace_pass_tester.cc DEPS mkldnn_inplace_pass) cc_test(test_cpu_quantize_placement_pass SRCS mkldnn/cpu_quantize_placement_pass_tester.cc DEPS cpu_quantize_placement_pass) diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index b0d41e7fa3..4fbdaf3330 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -1581,6 +1581,21 @@ PDNode *patterns::MatmulDequant::operator()() { return dequant_out; } +PDNode *patterns::ScaleMatmul::operator()() { + auto scale_in = pattern->NewNode(scale_in_repr()) + ->AsInput() + ->assert_is_op_input("scale", "X"); + auto scale_op = pattern->NewNode(scale_op_repr())->assert_is_op("scale"); + auto scale_out = pattern->NewNode(scale_out_repr()) + ->AsOutput() + ->assert_is_op_output("scale", "Out"); + auto matmul_op = pattern->NewNode(matmul_op_repr())->assert_is_op("matmul"); + + scale_op->LinksFrom({scale_in}).LinksTo({scale_out}); + matmul_op->LinksFrom({scale_out}); + return matmul_op; +} + PDNode *patterns::PriorBox::operator()() { auto prior_box_op = pattern->NewNode(prior_box_op_repr())->assert_is_op("prior_box"); diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index 5444c143bf..3139ec6ba7 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -972,6 +972,18 @@ struct MatmulDequant : public PatternBase { PATTERN_DECL_NODE(dequant_out); }; +// Scale + Matmul +struct ScaleMatmul : public PatternBase { + ScaleMatmul(PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, "scale_matmul") {} + + PDNode* operator()(); + PATTERN_DECL_NODE(scale_in); + PATTERN_DECL_NODE(scale_op); + PATTERN_DECL_NODE(scale_out); + PATTERN_DECL_NODE(matmul_op); +}; + // PriorBox operator // operator: prior_box_op // inputs: prior_box_input, prior_box_image diff --git a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc new file mode 100644 index 0000000000..0d720e828b --- /dev/null +++ b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc @@ -0,0 +1,84 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h" +#include +#include +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/string/pretty_log.h" + +namespace paddle { +namespace framework { +namespace ir { + +using string::PrettyLogDetail; + +void ScaleMatmulFusePass::ApplyImpl(ir::Graph* graph) const { + PADDLE_ENFORCE_NOT_NULL(graph, + platform::errors::InvalidArgument( + "Pointer to graph argument should not be NULL.")); + + FusePassBase::Init("scale_matmul_fuse_pass", graph); + GraphPatternDetector gpd; + patterns::ScaleMatmul scale_matmul_pattern{gpd.mutable_pattern(), + "scale_matmul"}; + scale_matmul_pattern(); + + int found_scale_matmul_fuse_count = 0; + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + GET_IR_NODE_FROM_SUBGRAPH(scale_in, scale_in, scale_matmul_pattern); + GET_IR_NODE_FROM_SUBGRAPH(scale_op, scale_op, scale_matmul_pattern); + GET_IR_NODE_FROM_SUBGRAPH(scale_out, scale_out, scale_matmul_pattern); + GET_IR_NODE_FROM_SUBGRAPH(matmul_op, matmul_op, scale_matmul_pattern); + + if (scale_op->Op()->GetAttrIfExists("bias") == 0.0) { + auto matmul_alpha = matmul_op->Op()->GetAttrIfExists("alpha"); + auto scale_scale = scale_op->Op()->GetAttrIfExists("scale"); + PADDLE_ENFORCE_GT(matmul_alpha, 0.0f, + platform::errors::InvalidArgument( + "Alpha of matmul op should have positive value")); + PADDLE_ENFORCE_GT(scale_scale, 0.0f, + platform::errors::InvalidArgument( + "Scale of scale op should have positive value")); + + std::string matmul_op_input_name; + for (auto name : matmul_op->Op()->InputNames()) + for (auto input_name : matmul_op->Op()->Input(name)) + if (input_name == scale_out->Name()) matmul_op_input_name = name; + + PADDLE_ENFORCE_NE( + matmul_op_input_name.empty(), true, + platform::errors::NotFound("Operator after scale operator " + "should have scale output as input")); + matmul_op->Op()->SetAttr("alpha", matmul_alpha * scale_scale); + matmul_op->Op()->SetInput(matmul_op_input_name, + std::vector({scale_in->Name()})); + IR_NODE_LINK_TO(scale_in, matmul_op); + GraphSafeRemoveNodes(graph, {scale_op, scale_out}); + found_scale_matmul_fuse_count++; + } + }; + gpd(graph, handler); + AddStatis(found_scale_matmul_fuse_count); + PrettyLogDetail("--- fused %d scale with matmul", + found_scale_matmul_fuse_count); +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(scale_matmul_fuse_pass, + paddle::framework::ir::ScaleMatmulFusePass); diff --git a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h new file mode 100644 index 0000000000..fe97b9681c --- /dev/null +++ b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/fluid/framework/ir/fuse_pass_base.h" + +namespace paddle { +namespace framework { +namespace ir { + +class ScaleMatmulFusePass : public FusePassBase { + public: + virtual ~ScaleMatmulFusePass() {} + + protected: + void ApplyImpl(ir::Graph* graph) const override; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass_tester.cc new file mode 100644 index 0000000000..d37d014a87 --- /dev/null +++ b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass_tester.cc @@ -0,0 +1,104 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h" +#include + +namespace paddle { +namespace framework { +namespace ir { + +void SetOp(ProgramDesc* prog, const std::string& type, + const std::vector& inputs, + const std::vector& outputs, float scale = 1.0f, + float bias = 0.0f) { + auto* op = prog->MutableBlock(0)->AppendOp(); + + op->SetType(type); + if (type == "scale") { + op->SetInput("X", {inputs[0]}); + op->SetAttr("scale", scale); + op->SetAttr("bias", bias); + } else if (type == "matmul") { + op->SetInput("X", {inputs[0]}); + if (inputs.size() > 1) op->SetInput("Y", {inputs[1]}); + op->SetAttr("alpha", scale); + } else { + FAIL() << "Unexpected operator type."; + } + op->SetOutput("Out", {outputs[0]}); +} + +// a->scale->b +// (b,c)->matmul->d +ProgramDesc BuildProgramDesc(float scale, float bias, float alpha) { + ProgramDesc prog; + + for (auto& v : std::vector({"a", "b", "c", "d"})) { + prog.MutableBlock(0)->Var(v); + } + SetOp(&prog, "scale", {"a"}, {"b"}, scale, bias); + SetOp(&prog, "matmul", {"b", "c"}, {"d"}, alpha); + return prog; +} + +void MainTest(const ProgramDesc& prog, int removed_nodes_count, + const std::vector scale_in_out, + const std::vector matmul_in_out, float alpha) { + std::unique_ptr graph(new ir::Graph(prog)); + int original_nodes_num = graph->Nodes().size(); + auto pass = PassRegistry::Instance().Get("scale_matmul_fuse_pass"); + graph.reset(pass->Apply(graph.release())); + int current_nodes_num = graph->Nodes().size(); + + for (auto* node : graph->Nodes()) { + if (node->IsOp()) { + auto* op = node->Op(); + if (op->Type() == "scale") { + EXPECT_EQ(op->Input("X")[0], scale_in_out[0]); + EXPECT_EQ(op->Output("Out")[0], scale_in_out[1]); + } else if (op->Type() == "matmul") { + EXPECT_EQ(op->Input("X")[0], matmul_in_out[0]); + EXPECT_EQ(op->Input("Y")[0], matmul_in_out[1]); + EXPECT_EQ(op->Output("Out")[0], matmul_in_out[2]); + EXPECT_EQ(op->GetAttrIfExists("alpha"), alpha); + } + } + } + EXPECT_EQ(original_nodes_num - removed_nodes_count, current_nodes_num); +} + +TEST(ScaleMatmulFusePass, scale_matmul_with_no_bias) { + auto bias = 0.0f; + auto scale = 2.34f; + auto alpha = 3.45f; + int removed_nodes_count = 2; + MainTest(BuildProgramDesc(scale, bias, alpha), removed_nodes_count, {}, + {"a", "c", "d"}, scale * alpha); +} + +TEST(ScaleMatmulFusePass, scale_matmul_with_bias) { + auto bias = 1.0f; + auto scale = 2.34f; + auto alpha = 3.45f; + int removed_nodes_count = 0; + MainTest(BuildProgramDesc(scale, bias, alpha), removed_nodes_count, + {"a", "b"}, {"b", "c", "d"}, alpha); +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +USE_PASS(scale_matmul_fuse_pass); diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index a05abf5a74..411de95d3c 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -195,6 +195,7 @@ void CpuPassStrategy::EnableMKLDNN() { "conv_leaky_relu_mkldnn_fuse_pass", // "conv_relu6_mkldnn_fuse_pass", // "conv_swish_mkldnn_fuse_pass", // + "scale_matmul_fuse_pass", // // Disabled due to topology-dependent speed-up // "fc_mkldnn_pass", "mkldnn_inplace_pass", // This pass should be activated after diff --git a/python/paddle/fluid/contrib/slim/quantization/qat2_int8_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/quantization/qat2_int8_mkldnn_pass.py index 2c91b7599d..43b173f434 100644 --- a/python/paddle/fluid/contrib/slim/quantization/qat2_int8_mkldnn_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/qat2_int8_mkldnn_pass.py @@ -470,7 +470,7 @@ class Qat2Int8MkldnnPass(object): if self._debug: graph.draw('.', 'qat_int8_{}'.format(ir_pass.type()), graph.all_op_nodes()) - + graph = self._apply_pass(graph, 'scale_matmul_fuse_pass') graph = self._apply_pass( graph, 'cpu_quantize_pass', ['quant_var_scales', 'data_layout'], [self._var_quant_scales, self._get_data_layout()]) -- GitLab