diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 16dfc90d27e6a6087a751e0172cbe84e7d377dca..0107f5976499ce3d29673c5203809390e7da3d8c 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -188,4 +188,6 @@ endif() cc_test(test_cpu_bfloat16_pass SRCS mkldnn/cpu_bfloat16_pass_tester.cc DEPS cpu_bfloat16_pass) cc_test(test_multi_gru_fuse_pass SRCS mkldnn/multi_gru_fuse_pass_tester.cc DEPS multi_gru_fuse_pass) cc_test(test_multi_gru_seq_fuse_pass SRCS mkldnn/multi_gru_seq_fuse_pass_tester.cc DEPS multi_gru_seq_fuse_pass) + set(TEST_FC_RNN_PASS_DEPS fc_gru_fuse_pass fc_lstm_fuse_pass mkldnn_placement_pass) + cc_test(test_fc_rnn_mkldnn_fuse_pass SRCS mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc DEPS ${TEST_FC_RNN_PASS_DEPS}) endif () diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc index b1c62d40d4d7c7ea00528a35fde7eba5d80185f6..921e1ea513961d062661fe044bdadbcfb8210f22 100644 --- a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc @@ -47,8 +47,9 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, gru_pattern(fc_out); // Create New OpDesc - auto gru_creater = [&](Node* gru, Node* x, Node* weight_x, Node* weight_h, - Node* bias, Node* hidden, Node* fc_bias) { + auto gru_creator = [&](Node* gru, Node* x, Node* weight_x, Node* weight_h, + Node* bias, Node* hidden, Node* fc_bias, + const bool use_mkldnn) { OpDesc op_desc; op_desc.SetType("fusion_gru"); @@ -67,6 +68,7 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, gru->Op()->GetAttrIfExists("origin_mode")); // TODO(TJ): This should be a option for infer op_desc.SetAttr("use_seq", true); + op_desc.SetAttr("use_mkldnn", use_mkldnn); op_desc.SetAttr("activation", gru->Op()->GetAttr("activation")); op_desc.SetAttr("gate_activation", gru->Op()->GetAttr("gate_activation")); @@ -149,6 +151,11 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, LOG(INFO) << "fc_gru_fuse_pass not supported when origin_mode=True."; return; } + const bool use_mkldnn = + (mul->Op()->GetAttrIfExists("use_mkldnn") && + gru->Op()->GetAttrIfExists("activation") == "tanh" && + gru->Op()->GetAttrIfExists("gate_activation") == + "sigmoid"); if (with_fc_bias) { GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern); @@ -156,14 +163,14 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(fc_out, elementwise_add_out, fc_pattern); - gru_creater(gru, x_n, w, Weight, Bias, Hidden, fc_bias); + gru_creator(gru, x_n, w, Weight, Bias, Hidden, fc_bias, use_mkldnn); // Remove unneeded nodes. std::unordered_set marked_nodes( {mul, gru, elementwise_add, fc_out, mul_out, BatchGate, BatchResetHiddenPrev, BatchHidden}); GraphSafeRemoveNodes(graph, marked_nodes); } else { - gru_creater(gru, x_n, w, Weight, Bias, Hidden, nullptr); + gru_creator(gru, x_n, w, Weight, Bias, Hidden, nullptr, use_mkldnn); // Remove unneeded nodes. std::unordered_set marked_nodes( {mul, gru, BatchGate, BatchResetHiddenPrev, BatchHidden}); diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.cc index 70351b8aafffa1a42c4ac4c3cd281f230ef956c8..6ec47fae26a932b26147b9811dd9d9a54cc1cccc 100644 --- a/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.cc @@ -12,77 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/fc_gru_fuse_pass.h" - -#include -#include "paddle/fluid/framework/ir/pass_tester_helper.h" +#include "paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h" namespace paddle { namespace framework { namespace ir { -void AddVarToScope(Scope* param_scope, const std::string& name, - const DDim& dims) { - auto* tensor = param_scope->Var(name)->GetMutable(); - tensor->Resize(dims); - tensor->mutable_data(platform::CPUPlace()); -} - -Scope* CreateParamScope() { - auto param_scope = new Scope(); - AddVarToScope(param_scope, "gru_fc_w", {}); - AddVarToScope(param_scope, "gru_fc_b", {}); - AddVarToScope(param_scope, "gru_w", {}); - AddVarToScope(param_scope, "gru_b", {}); - AddVarToScope(param_scope, "gru_batch_gate_0", {}); - AddVarToScope(param_scope, "gru_batch_reset_hidden_prev_0", {}); - AddVarToScope(param_scope, "gru_batch_hidden_0", {}); - AddVarToScope(param_scope, "gru_hidden_0", {}); - AddVarToScope(param_scope, "gru_batch_gate_1", {}); - AddVarToScope(param_scope, "gru_batch_reset_hidden_prev_1", {}); - AddVarToScope(param_scope, "gru_batch_hidden_1", {}); - AddVarToScope(param_scope, "gru_hidden_1", {}); - return param_scope; -} - -TEST(FCFusePass, basic) { - // inputs operator output - // -------------------------------------------------------- - // (a, gru_fc_w) mul -> fc_0_tmp_0 - // (fc_0_tmp_0, gru_fc_b) elementwise_add -> fc_0_tmp_1 - // (fc_0_tmp_1,gru_w,gru_b gru -> gru_out_0 - - // (b, gru_fc_w) mul -> fc_1_tmp_0 - // (fc_1_tmp_0, gru_fc_b) elementwise_add -> fc_1_tmp_1 - // (fc_1_tmp_1,gru_w,gru_b) gru -> gru_out_1 - Layers layers; - auto* a = layers.data("a"); - auto* b = layers.data("b"); - auto* fc_w = layers.data("gru_fc_w", {}, true); - auto* fc_b = layers.data("gru_fc_b", {}, true); - auto* gru_w = layers.data("gru_w", {}, true); - auto* gru_b = layers.data("gru_b", {}, true); - auto* fc_0_tmp0 = layers.mul(a, fc_w); - auto* fc_0_tmp1 = layers.elementwise_add(fc_0_tmp0, fc_b); - auto* gru_batch_gate_0 = layers.data("gru_batch_gate_0", {}, false); - auto* gru_batch_reset_hidden_prev_0 = - layers.data("gru_batch_reset_hidden_prev_0", {}, false); - auto* gru_batch_hidden_0 = layers.data("gru_batch_hidden_0", {}, false); - auto* gru_hidden_0 = layers.data("gru_hidden_0", {}, false); - layers.gru(fc_0_tmp1, gru_w, gru_b, gru_batch_gate_0, - gru_batch_reset_hidden_prev_0, gru_batch_hidden_0, gru_hidden_0); - - auto* fc_1_tmp0 = layers.mul(b, fc_w); - auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b); - auto* gru_batch_gate_1 = layers.data("gru_batch_gate_1", {}, false); - auto* gru_batch_reset_hidden_prev_1 = - layers.data("gru_batch_reset_hidden_prev_1", {}, false); - auto* gru_batch_hidden_1 = layers.data("gru_batch_hidden_1", {}, false); - auto* gru_hidden_1 = layers.data("gru_hidden_1", {}, false); - layers.gru(fc_1_tmp1, gru_w, gru_b, gru_batch_gate_1, - gru_batch_reset_hidden_prev_1, gru_batch_hidden_1, gru_hidden_1); - - std::unique_ptr graph(new ir::Graph(layers.main_program())); +namespace fc_gru_test { +TEST(FcGruFusePass, basic) { + std::unique_ptr graph = PrepareGraph(); auto pass = PassRegistry::Instance().Get("fc_gru_fuse_pass"); pass->Set("use_gpu", new bool(true)); graph->Set("__param_scope__", CreateParamScope()); @@ -109,6 +47,7 @@ TEST(FCFusePass, basic) { "expectations after fuse")); } +} // namespace fc_gru_test } // namespace ir } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h new file mode 100644 index 0000000000000000000000000000000000000000..a862755d604e44754f0905bb5f4c53d91daeadaf --- /dev/null +++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h @@ -0,0 +1,96 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "paddle/fluid/framework/ir/fc_gru_fuse_pass.h" + +#include +#include "paddle/fluid/framework/ir/pass_tester_helper.h" + +namespace paddle { +namespace framework { +namespace ir { + +namespace fc_gru_test { +void AddVarToScope(Scope* param_scope, const std::string& name, + const DDim& dims) { + auto* tensor = param_scope->Var(name)->GetMutable(); + tensor->Resize(dims); + tensor->mutable_data(platform::CPUPlace()); +} + +Scope* CreateParamScope() { + auto param_scope = new Scope(); + AddVarToScope(param_scope, "gru_fc_w", {}); + AddVarToScope(param_scope, "gru_fc_b", {}); + AddVarToScope(param_scope, "gru_w", {}); + AddVarToScope(param_scope, "gru_b", {}); + AddVarToScope(param_scope, "gru_batch_gate_0", {}); + AddVarToScope(param_scope, "gru_batch_reset_hidden_prev_0", {}); + AddVarToScope(param_scope, "gru_batch_hidden_0", {}); + AddVarToScope(param_scope, "gru_hidden_0", {}); + AddVarToScope(param_scope, "gru_batch_gate_1", {}); + AddVarToScope(param_scope, "gru_batch_reset_hidden_prev_1", {}); + AddVarToScope(param_scope, "gru_batch_hidden_1", {}); + AddVarToScope(param_scope, "gru_hidden_1", {}); + return param_scope; +} + +std::unique_ptr PrepareGraph( + std::string activation = "tanh", std::string gate_activation = "sigmoid") { + // inputs operator output + // -------------------------------------------------------- + // (a, gru_fc_w) mul -> fc_0_tmp_0 + // (fc_0_tmp_0, gru_fc_b) elementwise_add -> fc_0_tmp_1 + // (fc_0_tmp_1,gru_w,gru_b gru -> gru_out_0 + + // (b, gru_fc_w) mul -> fc_1_tmp_0 + // (fc_1_tmp_0, gru_fc_b) elementwise_add -> fc_1_tmp_1 + // (fc_1_tmp_1,gru_w,gru_b) gru -> gru_out_1 + Layers layers; + auto* a = layers.data("a"); + auto* b = layers.data("b"); + auto* fc_w = layers.data("gru_fc_w", {}, true); + auto* fc_b = layers.data("gru_fc_b", {}, true); + auto* gru_w = layers.data("gru_w", {}, true); + auto* gru_b = layers.data("gru_b", {}, true); + auto* fc_0_tmp0 = layers.mul(a, fc_w); + auto* fc_0_tmp1 = layers.elementwise_add(fc_0_tmp0, fc_b); + auto* gru_batch_gate_0 = layers.data("gru_batch_gate_0", {}, false); + auto* gru_batch_reset_hidden_prev_0 = + layers.data("gru_batch_reset_hidden_prev_0", {}, false); + auto* gru_batch_hidden_0 = layers.data("gru_batch_hidden_0", {}, false); + auto* gru_hidden_0 = layers.data("gru_hidden_0", {}, false); + layers.gru(fc_0_tmp1, gru_w, gru_b, gru_batch_gate_0, + gru_batch_reset_hidden_prev_0, gru_batch_hidden_0, gru_hidden_0, + nullptr, false, false, activation, gate_activation); + + auto* fc_1_tmp0 = layers.mul(b, fc_w); + auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b); + auto* gru_batch_gate_1 = layers.data("gru_batch_gate_1", {}, false); + auto* gru_batch_reset_hidden_prev_1 = + layers.data("gru_batch_reset_hidden_prev_1", {}, false); + auto* gru_batch_hidden_1 = layers.data("gru_batch_hidden_1", {}, false); + auto* gru_hidden_1 = layers.data("gru_hidden_1", {}, false); + layers.gru(fc_1_tmp1, gru_w, gru_b, gru_batch_gate_1, + gru_batch_reset_hidden_prev_1, gru_batch_hidden_1, gru_hidden_1, + nullptr, false, false, activation, gate_activation); + + std::unique_ptr graph(new ir::Graph(layers.main_program())); + return std::move(graph); +} +} // namespace fc_gru_test +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc index 1c1289124506ab4e3b1baf74211bea370c144380..6bd956ef0d53c989106157b54770d10156a2cefc 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc @@ -47,7 +47,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, // Create New OpDesc auto lstm_creator = [&](Node* lstm, Node* input, Node* weight_x, Node* weight_h, Node* bias, Node* hidden, Node* cell, - Node* xx, Node* fc_bias) { + Node* xx, Node* fc_bias, const bool use_mkldnn) { OpDesc op_desc; op_desc.SetType("fusion_lstm"); #define SET_IN(Key, node__) op_desc.SetInput(#Key, {node__->Name()}); @@ -88,6 +88,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, op_desc.SetOutput("XX", {xx->Name()}); op_desc.SetAttr("is_reverse", lstm->Op()->GetAttr("is_reverse")); op_desc.SetAttr("use_peepholes", lstm->Op()->GetAttr("use_peepholes")); + op_desc.SetAttr("use_mkldnn", use_mkldnn); // TODO(TJ): get from attr op_desc.SetAttr("use_seq", true); @@ -148,13 +149,22 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, GET_IR_NODE_FROM_SUBGRAPH(Cell, Cell, lstm_pattern); GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern); + const bool use_mkldnn = + (mul->Op()->GetAttrIfExists("use_mkldnn") && + lstm->Op()->GetAttrIfExists("gate_activation") == + "sigmoid" && + lstm->Op()->GetAttrIfExists("cell_activation") == + "tanh" && + lstm->Op()->GetAttrIfExists("candidate_activation") == + "tanh"); + if (with_fc_bias) { GET_IR_NODE_FROM_SUBGRAPH(fc_out, elementwise_add_out, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern); lstm_creator(lstm, subgraph.at(x), w, Weight, Bias, Hidden, Cell, fc_out, - fc_bias); + fc_bias, use_mkldnn); // Remove unneeded nodes. std::unordered_set marked_nodes( {mul, lstm, elementwise_add, mul_out, BatchGate, BatchCellPreAct}); @@ -162,7 +172,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, } else { GET_IR_NODE_FROM_SUBGRAPH(fc_out, mul_out, fc_pattern); lstm_creator(lstm, subgraph.at(x), w, Weight, Bias, Hidden, Cell, fc_out, - nullptr); + nullptr, use_mkldnn); // Remove unneeded nodes. std::unordered_set marked_nodes( {mul, lstm, BatchGate, BatchCellPreAct}); diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc index 0de8d4684fecd45fd05e579b82b1f7ada11592dd..92de86e52bc0a55fd7258f6b65002d875f69049b 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc @@ -12,77 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass.h" - -#include -#include "paddle/fluid/framework/ir/pass_tester_helper.h" +#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h" namespace paddle { namespace framework { namespace ir { -void AddVarToScope(Scope* param_scope, const std::string& name, - const DDim& dims) { - auto* tensor = param_scope->Var(name)->GetMutable(); - tensor->Resize(dims); - tensor->mutable_data(platform::CPUPlace()); -} - -Scope* CreateParamScope() { - auto param_scope = new Scope(); - AddVarToScope(param_scope, "lstm_fc_w", {}); - AddVarToScope(param_scope, "lstm_fc_b", {}); - AddVarToScope(param_scope, "lstm_w", {}); - AddVarToScope(param_scope, "lstm_b", {}); - AddVarToScope(param_scope, "lstm_cell_0", {}); - AddVarToScope(param_scope, "lstm_batch_gate_0", {}); - AddVarToScope(param_scope, "lstm_batch_cell_pre_gate_0", {}); - AddVarToScope(param_scope, "lstm_hidden_0", {}); - AddVarToScope(param_scope, "lstm_cell_1", {}); - AddVarToScope(param_scope, "lstm_batch_gate_1", {}); - AddVarToScope(param_scope, "lstm_batch_cell_pre_gate_1", {}); - AddVarToScope(param_scope, "lstm_hidden_1", {}); - return param_scope; -} - -TEST(FCLSTMFusePass, basic) { - // inputs operator output - // -------------------------------------------------------- - // (a, lstm_fc_w) mul -> fc_0_tmp_0 - // (fc_0_tmp_0, lstm_fc_b) elementwise_add -> fc_0_tmp_1 - // fc_0_tmp_1,lstm_w,lstm_b lstm -> lstm_out_0 - - // (b, lstm_fc_w) mul -> fc_1_tmp_0 - // (fc_1_tmp_0, lstm_fc_b) elementwise_add -> fc_1_tmp_1 - // (fc_1_tmp_1,lstm_w,lstm_b) lstm -> lstm_out_1 - Layers layers; - auto* a = layers.data("a"); - auto* b = layers.data("b"); - auto* fc_w = layers.data("lstm_fc_w", {}, true); - auto* fc_b = layers.data("lstm_fc_b", {}, true); - auto* lstm_w = layers.data("lstm_w", {}, true); - auto* lstm_b = layers.data("lstm_b", {}, true); - auto* fc_0_tmp0 = layers.mul(a, fc_w); - auto* fc_0_tmp1 = layers.elementwise_add(fc_0_tmp0, fc_b); - auto* lstm_cell_0 = layers.data("lstm_cell_0", {}, false); - auto* lstm_batch_gate_0 = layers.data("lstm_batch_gate_0", {}, false); - auto* lstm_batch_cell_pre_gate_0 = - layers.data("lstm_batch_cell_pre_gate_0", {}, false); - auto* lstm_hidden_0 = layers.data("lstm_hidden_0", {}, false); - layers.lstm(fc_0_tmp1, lstm_w, lstm_b, lstm_cell_0, lstm_batch_gate_0, - lstm_hidden_0, lstm_batch_cell_pre_gate_0); +namespace fc_lstm_test { - auto* fc_1_tmp0 = layers.mul(b, fc_w); - auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b); - auto* lstm_cell_1 = layers.data("lstm_cell_1", {}, false); - auto* lstm_batch_gate_1 = layers.data("lstm_batch_gate_1", {}, false); - auto* lstm_batch_cell_pre_gate_1 = - layers.data("lstm_batch_cell_pre_gate_1", {}, false); - auto* lstm_hidden_1 = layers.data("lstm_hidden_1", {}, false); - layers.lstm(fc_1_tmp1, lstm_w, lstm_b, lstm_cell_1, lstm_batch_gate_1, - lstm_hidden_1, lstm_batch_cell_pre_gate_1); - - std::unique_ptr graph(new ir::Graph(layers.main_program())); +TEST(FcLstmFusePass, basic) { + std::unique_ptr graph = PrepareGraph(); auto pass = PassRegistry::Instance().Get("fc_lstm_fuse_pass"); pass->Set("use_gpu", new bool(false)); graph->Set("__param_scope__", CreateParamScope()); @@ -108,7 +47,7 @@ TEST(FCLSTMFusePass, basic) { "The number of fusion_gru nodes does " "not meet expectations after fuse")); } - +} // namespace fc_lstm_test } // namespace ir } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h new file mode 100644 index 0000000000000000000000000000000000000000..f681a2b7ff8eb02bf7a546daa2edefbdfcdc9539 --- /dev/null +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h @@ -0,0 +1,100 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass.h" + +#include +#include "paddle/fluid/framework/ir/pass_tester_helper.h" + +namespace paddle { +namespace framework { +namespace ir { + +namespace fc_lstm_test { + +void AddVarToScope(Scope* param_scope, const std::string& name, + const DDim& dims) { + auto* tensor = param_scope->Var(name)->GetMutable(); + tensor->Resize(dims); + tensor->mutable_data(platform::CPUPlace()); +} + +Scope* CreateParamScope() { + auto param_scope = new Scope(); + AddVarToScope(param_scope, "lstm_fc_w", {}); + AddVarToScope(param_scope, "lstm_fc_b", {}); + AddVarToScope(param_scope, "lstm_w", {}); + AddVarToScope(param_scope, "lstm_b", {}); + AddVarToScope(param_scope, "lstm_cell_0", {}); + AddVarToScope(param_scope, "lstm_batch_gate_0", {}); + AddVarToScope(param_scope, "lstm_batch_cell_pre_gate_0", {}); + AddVarToScope(param_scope, "lstm_hidden_0", {}); + AddVarToScope(param_scope, "lstm_cell_1", {}); + AddVarToScope(param_scope, "lstm_batch_gate_1", {}); + AddVarToScope(param_scope, "lstm_batch_cell_pre_gate_1", {}); + AddVarToScope(param_scope, "lstm_hidden_1", {}); + return param_scope; +} + +std::unique_ptr PrepareGraph( + std::string gate_activation = "sigmoid", + std::string cell_activation = "tanh", + std::string candidate_activation = "tanh") { + // inputs operator output + // -------------------------------------------------------- + // (a, lstm_fc_w) mul -> fc_0_tmp_0 + // (fc_0_tmp_0, lstm_fc_b) elementwise_add -> fc_0_tmp_1 + // fc_0_tmp_1,lstm_w,lstm_b lstm -> lstm_out_0 + + // (b, lstm_fc_w) mul -> fc_1_tmp_0 + // (fc_1_tmp_0, lstm_fc_b) elementwise_add -> fc_1_tmp_1 + // (fc_1_tmp_1,lstm_w,lstm_b) lstm -> lstm_out_1 + Layers layers; + auto* a = layers.data("a"); + auto* b = layers.data("b"); + auto* fc_w = layers.data("lstm_fc_w", {}, true); + auto* fc_b = layers.data("lstm_fc_b", {}, true); + auto* lstm_w = layers.data("lstm_w", {}, true); + auto* lstm_b = layers.data("lstm_b", {}, true); + auto* fc_0_tmp0 = layers.mul(a, fc_w); + auto* fc_0_tmp1 = layers.elementwise_add(fc_0_tmp0, fc_b); + auto* lstm_cell_0 = layers.data("lstm_cell_0", {}, false); + auto* lstm_batch_gate_0 = layers.data("lstm_batch_gate_0", {}, false); + auto* lstm_batch_cell_pre_gate_0 = + layers.data("lstm_batch_cell_pre_gate_0", {}, false); + auto* lstm_hidden_0 = layers.data("lstm_hidden_0", {}, false); + layers.lstm(fc_0_tmp1, lstm_w, lstm_b, lstm_cell_0, lstm_batch_gate_0, + lstm_hidden_0, lstm_batch_cell_pre_gate_0, nullptr, nullptr, true, + false, gate_activation, cell_activation, candidate_activation); + auto* fc_1_tmp0 = layers.mul(b, fc_w); + auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b); + auto* lstm_cell_1 = layers.data("lstm_cell_1", {}, false); + auto* lstm_batch_gate_1 = layers.data("lstm_batch_gate_1", {}, false); + auto* lstm_batch_cell_pre_gate_1 = + layers.data("lstm_batch_cell_pre_gate_1", {}, false); + auto* lstm_hidden_1 = layers.data("lstm_hidden_1", {}, false); + layers.lstm(fc_1_tmp1, lstm_w, lstm_b, lstm_cell_1, lstm_batch_gate_1, + lstm_hidden_1, lstm_batch_cell_pre_gate_1, nullptr, nullptr, true, + false, gate_activation, cell_activation, candidate_activation); + + std::unique_ptr graph(new ir::Graph(layers.main_program())); + return std::move(graph); +} + +} // namespace fc_lstm_test +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 064da3d941602ee0e4f868fb0dbda305102da32b..3476ce8610ee34e17dfb6fa04d643cf9449ee154 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -2262,11 +2262,11 @@ PDNode *patterns::QuantizePlacement::operator()( PDNode *patterns::Bfloat16Placement::operator()( const std::unordered_set &bfloat16_enabled_op_types) { std::unordered_set supported_op_types = - std::unordered_set({"concat", "conv2d", "conv2d_transpose", - "elementwise_add", "elementwise_mul", - "fc", "fusion_gru", "gelu", "layer_norm", - "matmul", "pool2d", "relu", "reshape2", - "softmax", "sum", "transpose2"}); + std::unordered_set( + {"concat", "conv2d", "conv2d_transpose", "elementwise_add", + "elementwise_mul", "fc", "fusion_gru", "fusion_lstm", "gelu", + "layer_norm", "matmul", "pool2d", "relu", "reshape2", "softmax", + "sum", "transpose2"}); if (!bfloat16_enabled_op_types.empty()) { supported_op_types = bfloat16_enabled_op_types; } diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..c4770a322db50c495f9d47aba3d338615fa36219 --- /dev/null +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc @@ -0,0 +1,91 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h" +#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h" +#include "paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h" +#include "paddle/fluid/framework/ir/pass_tester_helper.h" + +namespace paddle { +namespace framework { +namespace ir { + +void TestFcRNNFusePass(const std::string& pass_name, + std::string activation = "tanh", + std::string gate_activation = "sigmoid", + std::string candidate_activation = "tanh") { + std::unique_ptr graph = + (pass_name == "fc_gru_fuse_pass" + ? fc_gru_test::PrepareGraph(activation, gate_activation) + : fc_lstm_test::PrepareGraph(gate_activation, activation, + candidate_activation)); + auto mkldnn_placement_pass_ = + PassRegistry::Instance().Get("mkldnn_placement_pass"); + mkldnn_placement_pass_->Set("mkldnn_enabled_op_types", + new std::unordered_set({})); + graph->Set("__param_scope__", (pass_name == "fc_gru_fuse_pass" + ? fc_gru_test::CreateParamScope() + : fc_lstm_test::CreateParamScope())); + graph.reset(mkldnn_placement_pass_->Apply(graph.release())); + + auto check_num_mkldnn_nodes = [&](const std::unique_ptr& graph) { + int nodes_cout = 0; + for (auto* node : graph->Nodes()) { + if (node->IsOp()) { + auto* op = node->Op(); + if (op->GetAttrIfExists("use_mkldnn")) nodes_cout++; + } + } + return nodes_cout; + }; + int num_mkldnn_nodes_before = check_num_mkldnn_nodes(graph); + int removed_mkldnn_nodes = 2; + + // OneDNN fusion_gru and fusion_lstm supports only sigmoid as a gate + // activation and tanh as an activation and candidate_activation + if (activation != "tanh" || gate_activation != "sigmoid" || + candidate_activation != "tanh") + removed_mkldnn_nodes += 2; + + auto fc_rnn_fuse_pass_ = PassRegistry::Instance().Get(pass_name); + graph.reset(fc_rnn_fuse_pass_->Apply(graph.release())); + int num_mkldnn_nodes_after = check_num_mkldnn_nodes(graph); + + PADDLE_ENFORCE_EQ(num_mkldnn_nodes_before - removed_mkldnn_nodes, + num_mkldnn_nodes_after, + platform::errors::PreconditionNotMet( + "The number of nodes with \"use_mkldnn\" attr after " + "passes is not as expected")); +} + +TEST(FcGruFusePass, use_mkldnn) { TestFcRNNFusePass("fc_gru_fuse_pass"); } + +TEST(FcGruFusePass, gru_unsupported_activations) { + TestFcRNNFusePass("fc_gru_fuse_pass", "relu", "sigmoid"); +} + +TEST(FcLstmFusePass, use_mkldnn) { TestFcRNNFusePass("fc_lstm_fuse_pass"); } + +TEST(FcLstmFusePass, lstm_unsupported_activations) { + TestFcRNNFusePass("fc_lstm_fuse_pass", "tanh", "relu", "tanh"); +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +USE_PASS(mkldnn_placement_pass); +USE_PASS(fc_gru_fuse_pass); +USE_PASS(fc_lstm_fuse_pass); diff --git a/paddle/fluid/framework/ir/pass_tester_helper.h b/paddle/fluid/framework/ir/pass_tester_helper.h index 850d3dca6d0e10dd2f93a2149bef268042de339b..4b6068d4776e3e12a3b735c101dd181922954374 100644 --- a/paddle/fluid/framework/ir/pass_tester_helper.h +++ b/paddle/fluid/framework/ir/pass_tester_helper.h @@ -194,17 +194,20 @@ struct Layers { } VarDesc* mul(VarDesc* x, VarDesc* y, VarDesc* out = nullptr, - int x_num_col_dims = 1, int y_num_col_dims = 1) { + int x_num_col_dims = 1, int y_num_col_dims = 1, + bool use_mkldnn = false) { AttributeMap attrs; attrs["x_num_col_dims"] = x_num_col_dims; attrs["y_num_col_dims"] = y_num_col_dims; + attrs["use_mkldnn"] = use_mkldnn; return binary_op("mul", x, y, out, &attrs); } VarDesc* elementwise_add(VarDesc* x, VarDesc* y, VarDesc* out = nullptr, - int axis = -1) { + int axis = -1, bool use_mkldnn = false) { AttributeMap attrs; attrs["axis"] = axis; + attrs["use_mkldnn"] = use_mkldnn; return binary_op("elementwise_add", x, y, out, &attrs); } diff --git a/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc b/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc index 024313837e0b63a4ff2325b9cedd75a608c2a879..720c90090cf746121ee79b44bd3c9ab35b736dba 100644 --- a/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc @@ -38,7 +38,6 @@ void SetAnalysisConfig(AnalysisConfig *cfg, cfg->SwitchSpecifyInputNames(false); cfg->SetCpuMathLibraryNumThreads(num_threads); cfg->EnableMKLDNN(); - cfg->pass_builder()->AppendPass("mkldnn_placement_pass"); } std::vector ReadSentenceLod(std::ifstream &file, size_t offset, diff --git a/paddle/fluid/operators/fused/fusion_lstm_op.cc b/paddle/fluid/operators/fused/fusion_lstm_op.cc index 6cca6b5a9729a7065e64771ec6bfb2b1cbb52cf5..42bf784b2af4fbcb1cde36d995f1152f0e31635b 100644 --- a/paddle/fluid/operators/fused/fusion_lstm_op.cc +++ b/paddle/fluid/operators/fused/fusion_lstm_op.cc @@ -249,6 +249,11 @@ void FusionLSTMOpMaker::Make() { AddAttr("use_mkldnn", "(bool, default false) Only used in mkldnn kernel") .SetDefault(false); + AddAttr( + "mkldnn_data_type", + "(string, default \"float32\"). Data type of mkldnn kernel") + .SetDefault("float32") + .InEnum({"float32", "int8", "bfloat16"}); AddAttr("Scale_data", "Scale to be used for int8 input/output data." "Only used with MKL-DNN INT8.") diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py index 7320efd259f459875b1ef37d89d6b316dc1efeac..fa9a93452dffdeb39d7cad7cebe2e6b18ef526dc 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py @@ -27,7 +27,7 @@ from paddle.fluid.tests.unittests.test_fusion_lstm_op import fc, ACTIVATION "place does not support BF16 evaluation") class TestFusionGRUBF16MKLDNNOp(OpTest): def set_confs(self): - self.mkldnn_data_type = False + pass def test_check_output(self): for use_seq in {True, False}: @@ -48,6 +48,7 @@ class TestFusionGRUBF16MKLDNNOp(OpTest): self.act_gate = 'sigmoid' self.origin_mode = False self.use_mkldnn = True + self.mkldnn_data_type = "bfloat16" self.force_fp32_output = False self.weights_dtype = 'fp32' self.set_confs() @@ -113,7 +114,8 @@ class TestFusionGRUBF16MKLDNNOp(OpTest): 'is_reverse': self.is_reverse, 'origin_mode': self.origin_mode, 'force_fp32_output': self.force_fp32_output, - 'use_mkldnn': self.use_mkldnn + 'use_mkldnn': self.use_mkldnn, + 'mkldnn_data_type': self.mkldnn_data_type, } diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py index 2d3caf0be97c9548bfc278a093535aec113b6b6c..4fda51e9e05f48592f1f262b2ce01e6bc3d56eef 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py @@ -35,6 +35,7 @@ class TestFusionGRUINT8MKLDNNOp(OpTest): self.act_gate = 'sigmoid' self.origin_mode = True self.use_mkldnn = True + self.mkldnn_data_type = "int8" self.force_fp32_output = True self.error_margin = 1e-5 self.set_confs() @@ -115,6 +116,7 @@ class TestFusionGRUINT8MKLDNNOp(OpTest): 'is_reverse': self.is_reverse, 'origin_mode': self.origin_mode, 'use_mkldnn': self.use_mkldnn, + 'mkldnn_data_type': self.mkldnn_data_type, 'force_fp32_output': self.force_fp32_output, 'Scale_data': scale_data, 'Shift_data': shift_data, diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py index d65919aa434c387348963e0a0ef00712ca91d549..d07eda3259960c15bdac576fa7506bf74db7b5e0 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py @@ -27,7 +27,7 @@ from paddle.fluid.tests.unittests.test_fusion_gru_op import fusion_gru "place does not support BF16 evaluation") class TestFusionLSTMBF16ONEDNNOp(OpTest): def set_confs(self): - self.mkldnn_data_type = False + pass def test_check_output(self): for use_seq in {True, False}: @@ -48,6 +48,7 @@ class TestFusionLSTMBF16ONEDNNOp(OpTest): self.act_cell = 'tanh' self.act_cand = 'tanh' self.use_mkldnn = True + self.mkldnn_data_type = "bfloat16" self.force_fp32_output = False self.weights_dtype = 'fp32' self.set_confs() @@ -130,7 +131,8 @@ class TestFusionLSTMBF16ONEDNNOp(OpTest): 'cell_activation': self.act_cell, 'candidate_activation': self.act_cand, 'force_fp32_output': self.force_fp32_output, - 'use_mkldnn': self.use_mkldnn + 'use_mkldnn': self.use_mkldnn, + 'mkldnn_data_type': self.mkldnn_data_type, } diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py index 93dc45f2650f531fef87d6291c48527a93d33db8..12f8c01783d9c3df7ba645a7a1c1bef45ccd84a9 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py @@ -34,6 +34,7 @@ class TestFusionLSTMINT8MKLDNNOp(OpTest): self.act_cand = 'tanh' self.use_peepholes = False # LSTM u8 doesn't support peepholes self.use_mkldnn = True + self.mkldnn_data_type = "int8" self.force_fp32_output = False self.error_margin = 1e-5 self.set_confs() @@ -117,6 +118,7 @@ class TestFusionLSTMINT8MKLDNNOp(OpTest): 'is_reverse': self.is_reverse, 'use_peepholes': self.use_peepholes, 'use_mkldnn': self.use_mkldnn, + 'mkldnn_data_type': self.mkldnn_data_type, 'force_fp32_output': self.force_fp32_output, 'Scale_data': scale_data, 'Shift_data': shift_data,