diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt
index 16dfc90d27e6a6087a751e0172cbe84e7d377dca..0107f5976499ce3d29673c5203809390e7da3d8c 100644
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -188,4 +188,6 @@ endif()
     cc_test(test_cpu_bfloat16_pass SRCS mkldnn/cpu_bfloat16_pass_tester.cc DEPS cpu_bfloat16_pass)
     cc_test(test_multi_gru_fuse_pass SRCS mkldnn/multi_gru_fuse_pass_tester.cc DEPS multi_gru_fuse_pass)
     cc_test(test_multi_gru_seq_fuse_pass SRCS mkldnn/multi_gru_seq_fuse_pass_tester.cc DEPS multi_gru_seq_fuse_pass)
+    set(TEST_FC_RNN_PASS_DEPS fc_gru_fuse_pass fc_lstm_fuse_pass mkldnn_placement_pass)
+    cc_test(test_fc_rnn_mkldnn_fuse_pass SRCS mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc DEPS ${TEST_FC_RNN_PASS_DEPS})
 endif ()
diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
index b1c62d40d4d7c7ea00528a35fde7eba5d80185f6..921e1ea513961d062661fe044bdadbcfb8210f22 100644
--- a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
@@ -47,8 +47,9 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
   gru_pattern(fc_out);
 
   // Create New OpDesc
-  auto gru_creater = [&](Node* gru, Node* x, Node* weight_x, Node* weight_h,
-                         Node* bias, Node* hidden, Node* fc_bias) {
+  auto gru_creator = [&](Node* gru, Node* x, Node* weight_x, Node* weight_h,
+                         Node* bias, Node* hidden, Node* fc_bias,
+                         const bool use_mkldnn) {
     OpDesc op_desc;
     op_desc.SetType("fusion_gru");
 
@@ -67,6 +68,7 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
                     gru->Op()->GetAttrIfExists<bool>("origin_mode"));
     // TODO(TJ): This should be a option for infer
     op_desc.SetAttr("use_seq", true);
+    op_desc.SetAttr("use_mkldnn", use_mkldnn);
     op_desc.SetAttr("activation", gru->Op()->GetAttr("activation"));
     op_desc.SetAttr("gate_activation", gru->Op()->GetAttr("gate_activation"));
 
@@ -149,6 +151,11 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
       LOG(INFO) << "fc_gru_fuse_pass not supported when origin_mode=True.";
       return;
     }
+    const bool use_mkldnn =
+        (mul->Op()->GetAttrIfExists<bool>("use_mkldnn") &&
+         gru->Op()->GetAttrIfExists<std::string>("activation") == "tanh" &&
+         gru->Op()->GetAttrIfExists<std::string>("gate_activation") ==
+             "sigmoid");
 
     if (with_fc_bias) {
       GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern);
@@ -156,14 +163,14 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
       GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern);
       GET_IR_NODE_FROM_SUBGRAPH(fc_out, elementwise_add_out, fc_pattern);
 
-      gru_creater(gru, x_n, w, Weight, Bias, Hidden, fc_bias);
+      gru_creator(gru, x_n, w, Weight, Bias, Hidden, fc_bias, use_mkldnn);
       // Remove unneeded nodes.
       std::unordered_set<const Node*> marked_nodes(
           {mul, gru, elementwise_add, fc_out, mul_out, BatchGate,
            BatchResetHiddenPrev, BatchHidden});
       GraphSafeRemoveNodes(graph, marked_nodes);
     } else {
-      gru_creater(gru, x_n, w, Weight, Bias, Hidden, nullptr);
+      gru_creator(gru, x_n, w, Weight, Bias, Hidden, nullptr, use_mkldnn);
       // Remove unneeded nodes.
       std::unordered_set<const Node*> marked_nodes(
           {mul, gru, BatchGate, BatchResetHiddenPrev, BatchHidden});
diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.cc
index 70351b8aafffa1a42c4ac4c3cd281f230ef956c8..6ec47fae26a932b26147b9811dd9d9a54cc1cccc 100644
--- a/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.cc
+++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.cc
@@ -12,77 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/fluid/framework/ir/fc_gru_fuse_pass.h"
-
-#include <gtest/gtest.h>
-#include "paddle/fluid/framework/ir/pass_tester_helper.h"
+#include "paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h"
 
 namespace paddle {
 namespace framework {
 namespace ir {
 
-void AddVarToScope(Scope* param_scope, const std::string& name,
-                   const DDim& dims) {
-  auto* tensor = param_scope->Var(name)->GetMutable<LoDTensor>();
-  tensor->Resize(dims);
-  tensor->mutable_data<float>(platform::CPUPlace());
-}
-
-Scope* CreateParamScope() {
-  auto param_scope = new Scope();
-  AddVarToScope(param_scope, "gru_fc_w", {});
-  AddVarToScope(param_scope, "gru_fc_b", {});
-  AddVarToScope(param_scope, "gru_w", {});
-  AddVarToScope(param_scope, "gru_b", {});
-  AddVarToScope(param_scope, "gru_batch_gate_0", {});
-  AddVarToScope(param_scope, "gru_batch_reset_hidden_prev_0", {});
-  AddVarToScope(param_scope, "gru_batch_hidden_0", {});
-  AddVarToScope(param_scope, "gru_hidden_0", {});
-  AddVarToScope(param_scope, "gru_batch_gate_1", {});
-  AddVarToScope(param_scope, "gru_batch_reset_hidden_prev_1", {});
-  AddVarToScope(param_scope, "gru_batch_hidden_1", {});
-  AddVarToScope(param_scope, "gru_hidden_1", {});
-  return param_scope;
-}
-
-TEST(FCFusePass, basic) {
-  // inputs                     operator            output
-  // --------------------------------------------------------
-  // (a, gru_fc_w)                mul         ->   fc_0_tmp_0
-  // (fc_0_tmp_0, gru_fc_b)  elementwise_add  ->   fc_0_tmp_1
-  // (fc_0_tmp_1,gru_w,gru_b      gru         ->   gru_out_0
-
-  // (b, gru_fc_w)                mul         ->   fc_1_tmp_0
-  // (fc_1_tmp_0, gru_fc_b)  elementwise_add  ->   fc_1_tmp_1
-  // (fc_1_tmp_1,gru_w,gru_b)     gru         ->   gru_out_1
-  Layers layers;
-  auto* a = layers.data("a");
-  auto* b = layers.data("b");
-  auto* fc_w = layers.data("gru_fc_w", {}, true);
-  auto* fc_b = layers.data("gru_fc_b", {}, true);
-  auto* gru_w = layers.data("gru_w", {}, true);
-  auto* gru_b = layers.data("gru_b", {}, true);
-  auto* fc_0_tmp0 = layers.mul(a, fc_w);
-  auto* fc_0_tmp1 = layers.elementwise_add(fc_0_tmp0, fc_b);
-  auto* gru_batch_gate_0 = layers.data("gru_batch_gate_0", {}, false);
-  auto* gru_batch_reset_hidden_prev_0 =
-      layers.data("gru_batch_reset_hidden_prev_0", {}, false);
-  auto* gru_batch_hidden_0 = layers.data("gru_batch_hidden_0", {}, false);
-  auto* gru_hidden_0 = layers.data("gru_hidden_0", {}, false);
-  layers.gru(fc_0_tmp1, gru_w, gru_b, gru_batch_gate_0,
-             gru_batch_reset_hidden_prev_0, gru_batch_hidden_0, gru_hidden_0);
-
-  auto* fc_1_tmp0 = layers.mul(b, fc_w);
-  auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b);
-  auto* gru_batch_gate_1 = layers.data("gru_batch_gate_1", {}, false);
-  auto* gru_batch_reset_hidden_prev_1 =
-      layers.data("gru_batch_reset_hidden_prev_1", {}, false);
-  auto* gru_batch_hidden_1 = layers.data("gru_batch_hidden_1", {}, false);
-  auto* gru_hidden_1 = layers.data("gru_hidden_1", {}, false);
-  layers.gru(fc_1_tmp1, gru_w, gru_b, gru_batch_gate_1,
-             gru_batch_reset_hidden_prev_1, gru_batch_hidden_1, gru_hidden_1);
-
-  std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
+namespace fc_gru_test {
+TEST(FcGruFusePass, basic) {
+  std::unique_ptr<ir::Graph> graph = PrepareGraph();
   auto pass = PassRegistry::Instance().Get("fc_gru_fuse_pass");
   pass->Set("use_gpu", new bool(true));
   graph->Set("__param_scope__", CreateParamScope());
@@ -109,6 +47,7 @@ TEST(FCFusePass, basic) {
                         "expectations after fuse"));
 }
 
+}  // namespace fc_gru_test
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h
new file mode 100644
index 0000000000000000000000000000000000000000..a862755d604e44754f0905bb5f4c53d91daeadaf
--- /dev/null
+++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h
@@ -0,0 +1,96 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include "paddle/fluid/framework/ir/fc_gru_fuse_pass.h"
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/framework/ir/pass_tester_helper.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+namespace fc_gru_test {
+void AddVarToScope(Scope* param_scope, const std::string& name,
+                   const DDim& dims) {
+  auto* tensor = param_scope->Var(name)->GetMutable<LoDTensor>();
+  tensor->Resize(dims);
+  tensor->mutable_data<float>(platform::CPUPlace());
+}
+
+Scope* CreateParamScope() {
+  auto param_scope = new Scope();
+  AddVarToScope(param_scope, "gru_fc_w", {});
+  AddVarToScope(param_scope, "gru_fc_b", {});
+  AddVarToScope(param_scope, "gru_w", {});
+  AddVarToScope(param_scope, "gru_b", {});
+  AddVarToScope(param_scope, "gru_batch_gate_0", {});
+  AddVarToScope(param_scope, "gru_batch_reset_hidden_prev_0", {});
+  AddVarToScope(param_scope, "gru_batch_hidden_0", {});
+  AddVarToScope(param_scope, "gru_hidden_0", {});
+  AddVarToScope(param_scope, "gru_batch_gate_1", {});
+  AddVarToScope(param_scope, "gru_batch_reset_hidden_prev_1", {});
+  AddVarToScope(param_scope, "gru_batch_hidden_1", {});
+  AddVarToScope(param_scope, "gru_hidden_1", {});
+  return param_scope;
+}
+
+std::unique_ptr<ir::Graph> PrepareGraph(
+    std::string activation = "tanh", std::string gate_activation = "sigmoid") {
+  // inputs                     operator            output
+  // --------------------------------------------------------
+  // (a, gru_fc_w)                mul         ->   fc_0_tmp_0
+  // (fc_0_tmp_0, gru_fc_b)  elementwise_add  ->   fc_0_tmp_1
+  // (fc_0_tmp_1,gru_w,gru_b      gru         ->   gru_out_0
+
+  // (b, gru_fc_w)                mul         ->   fc_1_tmp_0
+  // (fc_1_tmp_0, gru_fc_b)  elementwise_add  ->   fc_1_tmp_1
+  // (fc_1_tmp_1,gru_w,gru_b)     gru         ->   gru_out_1
+  Layers layers;
+  auto* a = layers.data("a");
+  auto* b = layers.data("b");
+  auto* fc_w = layers.data("gru_fc_w", {}, true);
+  auto* fc_b = layers.data("gru_fc_b", {}, true);
+  auto* gru_w = layers.data("gru_w", {}, true);
+  auto* gru_b = layers.data("gru_b", {}, true);
+  auto* fc_0_tmp0 = layers.mul(a, fc_w);
+  auto* fc_0_tmp1 = layers.elementwise_add(fc_0_tmp0, fc_b);
+  auto* gru_batch_gate_0 = layers.data("gru_batch_gate_0", {}, false);
+  auto* gru_batch_reset_hidden_prev_0 =
+      layers.data("gru_batch_reset_hidden_prev_0", {}, false);
+  auto* gru_batch_hidden_0 = layers.data("gru_batch_hidden_0", {}, false);
+  auto* gru_hidden_0 = layers.data("gru_hidden_0", {}, false);
+  layers.gru(fc_0_tmp1, gru_w, gru_b, gru_batch_gate_0,
+             gru_batch_reset_hidden_prev_0, gru_batch_hidden_0, gru_hidden_0,
+             nullptr, false, false, activation, gate_activation);
+
+  auto* fc_1_tmp0 = layers.mul(b, fc_w);
+  auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b);
+  auto* gru_batch_gate_1 = layers.data("gru_batch_gate_1", {}, false);
+  auto* gru_batch_reset_hidden_prev_1 =
+      layers.data("gru_batch_reset_hidden_prev_1", {}, false);
+  auto* gru_batch_hidden_1 = layers.data("gru_batch_hidden_1", {}, false);
+  auto* gru_hidden_1 = layers.data("gru_hidden_1", {}, false);
+  layers.gru(fc_1_tmp1, gru_w, gru_b, gru_batch_gate_1,
+             gru_batch_reset_hidden_prev_1, gru_batch_hidden_1, gru_hidden_1,
+             nullptr, false, false, activation, gate_activation);
+
+  std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
+  return std::move(graph);
+}
+}  // namespace fc_gru_test
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
index 1c1289124506ab4e3b1baf74211bea370c144380..6bd956ef0d53c989106157b54770d10156a2cefc 100644
--- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
@@ -47,7 +47,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
   // Create New OpDesc
   auto lstm_creator = [&](Node* lstm, Node* input, Node* weight_x,
                           Node* weight_h, Node* bias, Node* hidden, Node* cell,
-                          Node* xx, Node* fc_bias) {
+                          Node* xx, Node* fc_bias, const bool use_mkldnn) {
     OpDesc op_desc;
     op_desc.SetType("fusion_lstm");
 #define SET_IN(Key, node__) op_desc.SetInput(#Key, {node__->Name()});
@@ -88,6 +88,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
     op_desc.SetOutput("XX", {xx->Name()});
     op_desc.SetAttr("is_reverse", lstm->Op()->GetAttr("is_reverse"));
     op_desc.SetAttr("use_peepholes", lstm->Op()->GetAttr("use_peepholes"));
+    op_desc.SetAttr("use_mkldnn", use_mkldnn);
     // TODO(TJ): get from attr
     op_desc.SetAttr("use_seq", true);
 
@@ -148,13 +149,22 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
     GET_IR_NODE_FROM_SUBGRAPH(Cell, Cell, lstm_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern);
+    const bool use_mkldnn =
+        (mul->Op()->GetAttrIfExists<bool>("use_mkldnn") &&
+         lstm->Op()->GetAttrIfExists<std::string>("gate_activation") ==
+             "sigmoid" &&
+         lstm->Op()->GetAttrIfExists<std::string>("cell_activation") ==
+             "tanh" &&
+         lstm->Op()->GetAttrIfExists<std::string>("candidate_activation") ==
+             "tanh");
+
     if (with_fc_bias) {
       GET_IR_NODE_FROM_SUBGRAPH(fc_out, elementwise_add_out, fc_pattern);
       GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern);
       GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern);
       GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern);
       lstm_creator(lstm, subgraph.at(x), w, Weight, Bias, Hidden, Cell, fc_out,
-                   fc_bias);
+                   fc_bias, use_mkldnn);
       // Remove unneeded nodes.
       std::unordered_set<const Node*> marked_nodes(
           {mul, lstm, elementwise_add, mul_out, BatchGate, BatchCellPreAct});
@@ -162,7 +172,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
     } else {
       GET_IR_NODE_FROM_SUBGRAPH(fc_out, mul_out, fc_pattern);
       lstm_creator(lstm, subgraph.at(x), w, Weight, Bias, Hidden, Cell, fc_out,
-                   nullptr);
+                   nullptr, use_mkldnn);
       // Remove unneeded nodes.
       std::unordered_set<const Node*> marked_nodes(
           {mul, lstm, BatchGate, BatchCellPreAct});
diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc
index 0de8d4684fecd45fd05e579b82b1f7ada11592dd..92de86e52bc0a55fd7258f6b65002d875f69049b 100644
--- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc
+++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.cc
@@ -12,77 +12,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass.h"
-
-#include <gtest/gtest.h>
-#include "paddle/fluid/framework/ir/pass_tester_helper.h"
+#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h"
 
 namespace paddle {
 namespace framework {
 namespace ir {
 
-void AddVarToScope(Scope* param_scope, const std::string& name,
-                   const DDim& dims) {
-  auto* tensor = param_scope->Var(name)->GetMutable<LoDTensor>();
-  tensor->Resize(dims);
-  tensor->mutable_data<float>(platform::CPUPlace());
-}
-
-Scope* CreateParamScope() {
-  auto param_scope = new Scope();
-  AddVarToScope(param_scope, "lstm_fc_w", {});
-  AddVarToScope(param_scope, "lstm_fc_b", {});
-  AddVarToScope(param_scope, "lstm_w", {});
-  AddVarToScope(param_scope, "lstm_b", {});
-  AddVarToScope(param_scope, "lstm_cell_0", {});
-  AddVarToScope(param_scope, "lstm_batch_gate_0", {});
-  AddVarToScope(param_scope, "lstm_batch_cell_pre_gate_0", {});
-  AddVarToScope(param_scope, "lstm_hidden_0", {});
-  AddVarToScope(param_scope, "lstm_cell_1", {});
-  AddVarToScope(param_scope, "lstm_batch_gate_1", {});
-  AddVarToScope(param_scope, "lstm_batch_cell_pre_gate_1", {});
-  AddVarToScope(param_scope, "lstm_hidden_1", {});
-  return param_scope;
-}
-
-TEST(FCLSTMFusePass, basic) {
-  // inputs                     operator            output
-  // --------------------------------------------------------
-  // (a, lstm_fc_w)                mul         ->   fc_0_tmp_0
-  // (fc_0_tmp_0, lstm_fc_b)  elementwise_add  ->   fc_0_tmp_1
-  // fc_0_tmp_1,lstm_w,lstm_b     lstm         ->   lstm_out_0
-
-  // (b, lstm_fc_w)                mul         ->   fc_1_tmp_0
-  // (fc_1_tmp_0, lstm_fc_b)  elementwise_add  ->   fc_1_tmp_1
-  // (fc_1_tmp_1,lstm_w,lstm_b)   lstm         ->   lstm_out_1
-  Layers layers;
-  auto* a = layers.data("a");
-  auto* b = layers.data("b");
-  auto* fc_w = layers.data("lstm_fc_w", {}, true);
-  auto* fc_b = layers.data("lstm_fc_b", {}, true);
-  auto* lstm_w = layers.data("lstm_w", {}, true);
-  auto* lstm_b = layers.data("lstm_b", {}, true);
-  auto* fc_0_tmp0 = layers.mul(a, fc_w);
-  auto* fc_0_tmp1 = layers.elementwise_add(fc_0_tmp0, fc_b);
-  auto* lstm_cell_0 = layers.data("lstm_cell_0", {}, false);
-  auto* lstm_batch_gate_0 = layers.data("lstm_batch_gate_0", {}, false);
-  auto* lstm_batch_cell_pre_gate_0 =
-      layers.data("lstm_batch_cell_pre_gate_0", {}, false);
-  auto* lstm_hidden_0 = layers.data("lstm_hidden_0", {}, false);
-  layers.lstm(fc_0_tmp1, lstm_w, lstm_b, lstm_cell_0, lstm_batch_gate_0,
-              lstm_hidden_0, lstm_batch_cell_pre_gate_0);
+namespace fc_lstm_test {
 
-  auto* fc_1_tmp0 = layers.mul(b, fc_w);
-  auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b);
-  auto* lstm_cell_1 = layers.data("lstm_cell_1", {}, false);
-  auto* lstm_batch_gate_1 = layers.data("lstm_batch_gate_1", {}, false);
-  auto* lstm_batch_cell_pre_gate_1 =
-      layers.data("lstm_batch_cell_pre_gate_1", {}, false);
-  auto* lstm_hidden_1 = layers.data("lstm_hidden_1", {}, false);
-  layers.lstm(fc_1_tmp1, lstm_w, lstm_b, lstm_cell_1, lstm_batch_gate_1,
-              lstm_hidden_1, lstm_batch_cell_pre_gate_1);
-
-  std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
+TEST(FcLstmFusePass, basic) {
+  std::unique_ptr<ir::Graph> graph = PrepareGraph();
   auto pass = PassRegistry::Instance().Get("fc_lstm_fuse_pass");
   pass->Set("use_gpu", new bool(false));
   graph->Set("__param_scope__", CreateParamScope());
@@ -108,7 +47,7 @@ TEST(FCLSTMFusePass, basic) {
                         "The number of fusion_gru nodes does "
                         "not meet expectations after fuse"));
 }
-
+}  // namespace fc_lstm_test
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h
new file mode 100644
index 0000000000000000000000000000000000000000..f681a2b7ff8eb02bf7a546daa2edefbdfcdc9539
--- /dev/null
+++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h
@@ -0,0 +1,100 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass.h"
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/framework/ir/pass_tester_helper.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+namespace fc_lstm_test {
+
+void AddVarToScope(Scope* param_scope, const std::string& name,
+                   const DDim& dims) {
+  auto* tensor = param_scope->Var(name)->GetMutable<LoDTensor>();
+  tensor->Resize(dims);
+  tensor->mutable_data<float>(platform::CPUPlace());
+}
+
+Scope* CreateParamScope() {
+  auto param_scope = new Scope();
+  AddVarToScope(param_scope, "lstm_fc_w", {});
+  AddVarToScope(param_scope, "lstm_fc_b", {});
+  AddVarToScope(param_scope, "lstm_w", {});
+  AddVarToScope(param_scope, "lstm_b", {});
+  AddVarToScope(param_scope, "lstm_cell_0", {});
+  AddVarToScope(param_scope, "lstm_batch_gate_0", {});
+  AddVarToScope(param_scope, "lstm_batch_cell_pre_gate_0", {});
+  AddVarToScope(param_scope, "lstm_hidden_0", {});
+  AddVarToScope(param_scope, "lstm_cell_1", {});
+  AddVarToScope(param_scope, "lstm_batch_gate_1", {});
+  AddVarToScope(param_scope, "lstm_batch_cell_pre_gate_1", {});
+  AddVarToScope(param_scope, "lstm_hidden_1", {});
+  return param_scope;
+}
+
+std::unique_ptr<ir::Graph> PrepareGraph(
+    std::string gate_activation = "sigmoid",
+    std::string cell_activation = "tanh",
+    std::string candidate_activation = "tanh") {
+  // inputs                     operator            output
+  // --------------------------------------------------------
+  // (a, lstm_fc_w)                mul         ->   fc_0_tmp_0
+  // (fc_0_tmp_0, lstm_fc_b)  elementwise_add  ->   fc_0_tmp_1
+  // fc_0_tmp_1,lstm_w,lstm_b     lstm         ->   lstm_out_0
+
+  // (b, lstm_fc_w)                mul         ->   fc_1_tmp_0
+  // (fc_1_tmp_0, lstm_fc_b)  elementwise_add  ->   fc_1_tmp_1
+  // (fc_1_tmp_1,lstm_w,lstm_b)   lstm         ->   lstm_out_1
+  Layers layers;
+  auto* a = layers.data("a");
+  auto* b = layers.data("b");
+  auto* fc_w = layers.data("lstm_fc_w", {}, true);
+  auto* fc_b = layers.data("lstm_fc_b", {}, true);
+  auto* lstm_w = layers.data("lstm_w", {}, true);
+  auto* lstm_b = layers.data("lstm_b", {}, true);
+  auto* fc_0_tmp0 = layers.mul(a, fc_w);
+  auto* fc_0_tmp1 = layers.elementwise_add(fc_0_tmp0, fc_b);
+  auto* lstm_cell_0 = layers.data("lstm_cell_0", {}, false);
+  auto* lstm_batch_gate_0 = layers.data("lstm_batch_gate_0", {}, false);
+  auto* lstm_batch_cell_pre_gate_0 =
+      layers.data("lstm_batch_cell_pre_gate_0", {}, false);
+  auto* lstm_hidden_0 = layers.data("lstm_hidden_0", {}, false);
+  layers.lstm(fc_0_tmp1, lstm_w, lstm_b, lstm_cell_0, lstm_batch_gate_0,
+              lstm_hidden_0, lstm_batch_cell_pre_gate_0, nullptr, nullptr, true,
+              false, gate_activation, cell_activation, candidate_activation);
+  auto* fc_1_tmp0 = layers.mul(b, fc_w);
+  auto* fc_1_tmp1 = layers.elementwise_add(fc_1_tmp0, fc_b);
+  auto* lstm_cell_1 = layers.data("lstm_cell_1", {}, false);
+  auto* lstm_batch_gate_1 = layers.data("lstm_batch_gate_1", {}, false);
+  auto* lstm_batch_cell_pre_gate_1 =
+      layers.data("lstm_batch_cell_pre_gate_1", {}, false);
+  auto* lstm_hidden_1 = layers.data("lstm_hidden_1", {}, false);
+  layers.lstm(fc_1_tmp1, lstm_w, lstm_b, lstm_cell_1, lstm_batch_gate_1,
+              lstm_hidden_1, lstm_batch_cell_pre_gate_1, nullptr, nullptr, true,
+              false, gate_activation, cell_activation, candidate_activation);
+
+  std::unique_ptr<ir::Graph> graph(new ir::Graph(layers.main_program()));
+  return std::move(graph);
+}
+
+}  // namespace fc_lstm_test
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc
index 064da3d941602ee0e4f868fb0dbda305102da32b..3476ce8610ee34e17dfb6fa04d643cf9449ee154 100644
--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -2262,11 +2262,11 @@ PDNode *patterns::QuantizePlacement::operator()(
 PDNode *patterns::Bfloat16Placement::operator()(
     const std::unordered_set<std::string> &bfloat16_enabled_op_types) {
   std::unordered_set<std::string> supported_op_types =
-      std::unordered_set<std::string>({"concat", "conv2d", "conv2d_transpose",
-                                       "elementwise_add", "elementwise_mul",
-                                       "fc", "fusion_gru", "gelu", "layer_norm",
-                                       "matmul", "pool2d", "relu", "reshape2",
-                                       "softmax", "sum", "transpose2"});
+      std::unordered_set<std::string>(
+          {"concat", "conv2d", "conv2d_transpose", "elementwise_add",
+           "elementwise_mul", "fc", "fusion_gru", "fusion_lstm", "gelu",
+           "layer_norm", "matmul", "pool2d", "relu", "reshape2", "softmax",
+           "sum", "transpose2"});
   if (!bfloat16_enabled_op_types.empty()) {
     supported_op_types = bfloat16_enabled_op_types;
   }
diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c4770a322db50c495f9d47aba3d338615fa36219
--- /dev/null
+++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc
@@ -0,0 +1,91 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h"
+#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h"
+#include "paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h"
+#include "paddle/fluid/framework/ir/pass_tester_helper.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+void TestFcRNNFusePass(const std::string& pass_name,
+                       std::string activation = "tanh",
+                       std::string gate_activation = "sigmoid",
+                       std::string candidate_activation = "tanh") {
+  std::unique_ptr<ir::Graph> graph =
+      (pass_name == "fc_gru_fuse_pass"
+           ? fc_gru_test::PrepareGraph(activation, gate_activation)
+           : fc_lstm_test::PrepareGraph(gate_activation, activation,
+                                        candidate_activation));
+  auto mkldnn_placement_pass_ =
+      PassRegistry::Instance().Get("mkldnn_placement_pass");
+  mkldnn_placement_pass_->Set("mkldnn_enabled_op_types",
+                              new std::unordered_set<std::string>({}));
+  graph->Set("__param_scope__", (pass_name == "fc_gru_fuse_pass"
+                                     ? fc_gru_test::CreateParamScope()
+                                     : fc_lstm_test::CreateParamScope()));
+  graph.reset(mkldnn_placement_pass_->Apply(graph.release()));
+
+  auto check_num_mkldnn_nodes = [&](const std::unique_ptr<ir::Graph>& graph) {
+    int nodes_cout = 0;
+    for (auto* node : graph->Nodes()) {
+      if (node->IsOp()) {
+        auto* op = node->Op();
+        if (op->GetAttrIfExists<bool>("use_mkldnn")) nodes_cout++;
+      }
+    }
+    return nodes_cout;
+  };
+  int num_mkldnn_nodes_before = check_num_mkldnn_nodes(graph);
+  int removed_mkldnn_nodes = 2;
+
+  // OneDNN fusion_gru and fusion_lstm supports only sigmoid as a gate
+  // activation and tanh as an activation and candidate_activation
+  if (activation != "tanh" || gate_activation != "sigmoid" ||
+      candidate_activation != "tanh")
+    removed_mkldnn_nodes += 2;
+
+  auto fc_rnn_fuse_pass_ = PassRegistry::Instance().Get(pass_name);
+  graph.reset(fc_rnn_fuse_pass_->Apply(graph.release()));
+  int num_mkldnn_nodes_after = check_num_mkldnn_nodes(graph);
+
+  PADDLE_ENFORCE_EQ(num_mkldnn_nodes_before - removed_mkldnn_nodes,
+                    num_mkldnn_nodes_after,
+                    platform::errors::PreconditionNotMet(
+                        "The number of nodes with \"use_mkldnn\" attr after "
+                        "passes is not as expected"));
+}
+
+TEST(FcGruFusePass, use_mkldnn) { TestFcRNNFusePass("fc_gru_fuse_pass"); }
+
+TEST(FcGruFusePass, gru_unsupported_activations) {
+  TestFcRNNFusePass("fc_gru_fuse_pass", "relu", "sigmoid");
+}
+
+TEST(FcLstmFusePass, use_mkldnn) { TestFcRNNFusePass("fc_lstm_fuse_pass"); }
+
+TEST(FcLstmFusePass, lstm_unsupported_activations) {
+  TestFcRNNFusePass("fc_lstm_fuse_pass", "tanh", "relu", "tanh");
+}
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+USE_PASS(mkldnn_placement_pass);
+USE_PASS(fc_gru_fuse_pass);
+USE_PASS(fc_lstm_fuse_pass);
diff --git a/paddle/fluid/framework/ir/pass_tester_helper.h b/paddle/fluid/framework/ir/pass_tester_helper.h
index 850d3dca6d0e10dd2f93a2149bef268042de339b..4b6068d4776e3e12a3b735c101dd181922954374 100644
--- a/paddle/fluid/framework/ir/pass_tester_helper.h
+++ b/paddle/fluid/framework/ir/pass_tester_helper.h
@@ -194,17 +194,20 @@ struct Layers {
   }
 
   VarDesc* mul(VarDesc* x, VarDesc* y, VarDesc* out = nullptr,
-               int x_num_col_dims = 1, int y_num_col_dims = 1) {
+               int x_num_col_dims = 1, int y_num_col_dims = 1,
+               bool use_mkldnn = false) {
     AttributeMap attrs;
     attrs["x_num_col_dims"] = x_num_col_dims;
     attrs["y_num_col_dims"] = y_num_col_dims;
+    attrs["use_mkldnn"] = use_mkldnn;
     return binary_op("mul", x, y, out, &attrs);
   }
 
   VarDesc* elementwise_add(VarDesc* x, VarDesc* y, VarDesc* out = nullptr,
-                           int axis = -1) {
+                           int axis = -1, bool use_mkldnn = false) {
     AttributeMap attrs;
     attrs["axis"] = axis;
+    attrs["use_mkldnn"] = use_mkldnn;
     return binary_op("elementwise_add", x, y, out, &attrs);
   }
 
diff --git a/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc b/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc
index 024313837e0b63a4ff2325b9cedd75a608c2a879..720c90090cf746121ee79b44bd3c9ab35b736dba 100644
--- a/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc
@@ -38,7 +38,6 @@ void SetAnalysisConfig(AnalysisConfig *cfg,
   cfg->SwitchSpecifyInputNames(false);
   cfg->SetCpuMathLibraryNumThreads(num_threads);
   cfg->EnableMKLDNN();
-  cfg->pass_builder()->AppendPass("mkldnn_placement_pass");
 }
 
 std::vector<size_t> ReadSentenceLod(std::ifstream &file, size_t offset,
diff --git a/paddle/fluid/operators/fused/fusion_lstm_op.cc b/paddle/fluid/operators/fused/fusion_lstm_op.cc
index 6cca6b5a9729a7065e64771ec6bfb2b1cbb52cf5..42bf784b2af4fbcb1cde36d995f1152f0e31635b 100644
--- a/paddle/fluid/operators/fused/fusion_lstm_op.cc
+++ b/paddle/fluid/operators/fused/fusion_lstm_op.cc
@@ -249,6 +249,11 @@ void FusionLSTMOpMaker::Make() {
   AddAttr<bool>("use_mkldnn",
                 "(bool, default false) Only used in mkldnn kernel")
       .SetDefault(false);
+  AddAttr<std::string>(
+      "mkldnn_data_type",
+      "(string, default \"float32\"). Data type of mkldnn kernel")
+      .SetDefault("float32")
+      .InEnum({"float32", "int8", "bfloat16"});
   AddAttr<float>("Scale_data",
                  "Scale to be used for int8 input/output data."
                  "Only used with MKL-DNN INT8.")
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py
index 7320efd259f459875b1ef37d89d6b316dc1efeac..fa9a93452dffdeb39d7cad7cebe2e6b18ef526dc 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_bf16_mkldnn_op.py
@@ -27,7 +27,7 @@ from paddle.fluid.tests.unittests.test_fusion_lstm_op import fc, ACTIVATION
                  "place does not support BF16 evaluation")
 class TestFusionGRUBF16MKLDNNOp(OpTest):
     def set_confs(self):
-        self.mkldnn_data_type = False
+        pass
 
     def test_check_output(self):
         for use_seq in {True, False}:
@@ -48,6 +48,7 @@ class TestFusionGRUBF16MKLDNNOp(OpTest):
         self.act_gate = 'sigmoid'
         self.origin_mode = False
         self.use_mkldnn = True
+        self.mkldnn_data_type = "bfloat16"
         self.force_fp32_output = False
         self.weights_dtype = 'fp32'
         self.set_confs()
@@ -113,7 +114,8 @@ class TestFusionGRUBF16MKLDNNOp(OpTest):
             'is_reverse': self.is_reverse,
             'origin_mode': self.origin_mode,
             'force_fp32_output': self.force_fp32_output,
-            'use_mkldnn': self.use_mkldnn
+            'use_mkldnn': self.use_mkldnn,
+            'mkldnn_data_type': self.mkldnn_data_type,
         }
 
 
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py
index 2d3caf0be97c9548bfc278a093535aec113b6b6c..4fda51e9e05f48592f1f262b2ce01e6bc3d56eef 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py
@@ -35,6 +35,7 @@ class TestFusionGRUINT8MKLDNNOp(OpTest):
         self.act_gate = 'sigmoid'
         self.origin_mode = True
         self.use_mkldnn = True
+        self.mkldnn_data_type = "int8"
         self.force_fp32_output = True
         self.error_margin = 1e-5
         self.set_confs()
@@ -115,6 +116,7 @@ class TestFusionGRUINT8MKLDNNOp(OpTest):
             'is_reverse': self.is_reverse,
             'origin_mode': self.origin_mode,
             'use_mkldnn': self.use_mkldnn,
+            'mkldnn_data_type': self.mkldnn_data_type,
             'force_fp32_output': self.force_fp32_output,
             'Scale_data': scale_data,
             'Shift_data': shift_data,
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py
index d65919aa434c387348963e0a0ef00712ca91d549..d07eda3259960c15bdac576fa7506bf74db7b5e0 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py
@@ -27,7 +27,7 @@ from paddle.fluid.tests.unittests.test_fusion_gru_op import fusion_gru
                  "place does not support BF16 evaluation")
 class TestFusionLSTMBF16ONEDNNOp(OpTest):
     def set_confs(self):
-        self.mkldnn_data_type = False
+        pass
 
     def test_check_output(self):
         for use_seq in {True, False}:
@@ -48,6 +48,7 @@ class TestFusionLSTMBF16ONEDNNOp(OpTest):
         self.act_cell = 'tanh'
         self.act_cand = 'tanh'
         self.use_mkldnn = True
+        self.mkldnn_data_type = "bfloat16"
         self.force_fp32_output = False
         self.weights_dtype = 'fp32'
         self.set_confs()
@@ -130,7 +131,8 @@ class TestFusionLSTMBF16ONEDNNOp(OpTest):
             'cell_activation': self.act_cell,
             'candidate_activation': self.act_cand,
             'force_fp32_output': self.force_fp32_output,
-            'use_mkldnn': self.use_mkldnn
+            'use_mkldnn': self.use_mkldnn,
+            'mkldnn_data_type': self.mkldnn_data_type,
         }
 
 
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py
index 93dc45f2650f531fef87d6291c48527a93d33db8..12f8c01783d9c3df7ba645a7a1c1bef45ccd84a9 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_int8_mkldnn_op.py
@@ -34,6 +34,7 @@ class TestFusionLSTMINT8MKLDNNOp(OpTest):
         self.act_cand = 'tanh'
         self.use_peepholes = False  # LSTM u8 doesn't support peepholes
         self.use_mkldnn = True
+        self.mkldnn_data_type = "int8"
         self.force_fp32_output = False
         self.error_margin = 1e-5
         self.set_confs()
@@ -117,6 +118,7 @@ class TestFusionLSTMINT8MKLDNNOp(OpTest):
             'is_reverse': self.is_reverse,
             'use_peepholes': self.use_peepholes,
             'use_mkldnn': self.use_mkldnn,
+            'mkldnn_data_type': self.mkldnn_data_type,
             'force_fp32_output': self.force_fp32_output,
             'Scale_data': scale_data,
             'Shift_data': shift_data,