Merge branch 'develop' of github.com:PaddlePaddle/Paddle into reset_vars_on_pserver

5558784c · Yancey1989 · 32b94a7d · 5023530a · 5558784c · 5558784c
8 changed file
--- a/paddle/fluid/framework/ir/fc_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_fuse_pass.cc
@@ -29,39 +29,27 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl(
  std::unordered_set<Node*> nodes2delete;
  GraphPatternDetector gpd;
-  // BuildFCPattern(gpd.mutable_pattern());
  auto* x = gpd.mutable_pattern()
                ->NewNode("fc_fuse/x")
                ->AsInput()
                ->assert_is_op_input("mul", "X");
-  patterns::FC(gpd.mutable_pattern(), "fc_fuse", x, true /*with bias*/);
+  patterns::FC fc_pattern(gpd.mutable_pattern(), "fc_fuse");
+  fc_pattern(x, true /*with bias*/);
-#define GET_NODE(id)                                                         \
-  PADDLE_ENFORCE(subgraph.count(gpd.pattern().RetrieveNode("fc_fuse/" #id)), \
-                 "pattern has no Node called %s", #id);                      \
-  auto* id = subgraph.at(gpd.pattern().RetrieveNode("fc_fuse/" #id));        \
-  PADDLE_ENFORCE_NOT_NULL(id, "subgraph has no node %s", "fc_fuse/" #id);
  int found_fc_count = 0;
  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                     Graph* g) {
    VLOG(4) << "handle FC fuse";
-    // Currently, there is no FC op available, so I will just simulate the
+    GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern);
-    // scenerio.
+    GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern);
-    // FC's fusion is simple, just op fuse, no need to process the
+    GET_IR_NODE_FROM_SUBGRAPH(fc_out, Out, fc_pattern);
-    // parameters.
+    GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern);
-    GET_NODE(x);                // x
+    GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern);
-    GET_NODE(w);                // Y
+    GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern);
-    GET_NODE(fc_bias);          // bias
-    GET_NODE(fc_out);           // Out
-    GET_NODE(mul);              // MUL op
-    GET_NODE(elementwise_add);  // ELEMENT_ADD op
-    GET_NODE(mul_out);          // tmp
-#undef GET_NODE
    // Create an FC Node.
    OpDesc desc;
-    std::string fc_x_in = x->Name();
+    std::string fc_x_in = subgraph.at(x)->Name();
    std::string fc_Y_in = w->Name();
    std::string fc_bias_in = fc_bias->Name();
    std::string fc_out_out = fc_out->Name();
@@ -73,7 +61,8 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl(
    auto fc_node = g->CreateOpNode(&desc);  // OpDesc will be copied.
    GraphSafeRemoveNodes(graph.get(), {mul, elementwise_add, mul_out});
-    IR_NODE_LINK_TO(x, fc_node);
+    PADDLE_ENFORCE(subgraph.count(x));
+    IR_NODE_LINK_TO(subgraph.at(x), fc_node);
    IR_NODE_LINK_TO(w, fc_node);
    IR_NODE_LINK_TO(fc_bias, fc_node);
    IR_NODE_LINK_TO(fc_node, fc_out);

--- a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
@@ -20,52 +20,43 @@ namespace paddle {
 namespace framework {
 namespace ir {
-static void BuildPattern(PDPattern* pattern, const std::string& name_scope,
-                         bool with_fc_bias) {
-  PDNode* x = pattern->NewNode(name_scope, "x")
-                  ->assert_is_op_input("mul")
-                  ->assert_var_not_persistable();
-  auto* fc_out = patterns::FC(pattern, name_scope, x, with_fc_bias);
-  fc_out->AsIntermediate();  // fc_out is a tmp var, will be removed after fuse.
-  patterns::GRU(pattern, name_scope, fc_out);
-  VLOG(3) << "fc_gru pattern \n" << pattern->DotString();
-}
 static int BuildFusion(Graph* graph, const std::string& name_scope,
                       Scope* scope, bool with_fc_bias) {
  GraphPatternDetector gpd;
  auto* pattern = gpd.mutable_pattern();
-  BuildPattern(pattern, name_scope, with_fc_bias);
+  // Create pattern.
+  patterns::FC fc_pattern(pattern, name_scope);
+  patterns::GRU gru_pattern(pattern, name_scope);
+  PDNode* x =
+      pattern->NewNode(patterns::UniqueKey("x"))->assert_var_not_persistable();
+  auto* fc_out = fc_pattern(x, with_fc_bias);
+  fc_out->AsIntermediate();  // fc_out is a tmp var, will be removed after fuse.
+  gru_pattern(fc_out);
  // Create New OpDesc
-  auto gru_creater = [&](int gru, int x, int weight_x, int weight_h, int bias,
+  auto gru_creater = [&](Node* gru, Node* x, Node* weight_x, Node* weight_h,
-                         int hidden, int fc_bias) {
+                         Node* bias, Node* hidden, Node* fc_bias) {
-#define GET_NODE(x) auto* x##_n = graph->RetriveNode(x);
-    GET_NODE(x);
-    GET_NODE(weight_x);
-    GET_NODE(weight_h);
-    GET_NODE(bias);
-    GET_NODE(hidden);
-    GET_NODE(gru);
    OpDesc op_desc;
    op_desc.SetType("fusion_gru");
 #define NEW_NAME(x) name_scope + "/at." #x ".new"
-#define SET_IN(Key, node__) op_desc.SetInput(#Key, {node__##_n->Name()});
+#define SET_IN(Key, node__) op_desc.SetInput(#Key, {node__->Name()});
    SET_IN(X, x);
    SET_IN(WeightX, weight_x);
    SET_IN(WeightH, weight_h);
    if (with_fc_bias) {
-      op_desc.SetInput("Bias", {NEW_NAME(bias) + bias_n->Name()});
+      op_desc.SetInput("Bias", {NEW_NAME(bias) + bias->Name()});
    } else {
      SET_IN(Bias, bias);
    }
 #undef SET_IN
    op_desc.SetInput("H0", {});
-    op_desc.SetOutput("Hidden", {hidden_n->Name()});
+    op_desc.SetOutput("Hidden", {hidden->Name()});
-    op_desc.SetAttr("is_reverse", gru_n->Op()->GetAttr("is_reverse"));
+    op_desc.SetAttr("is_reverse", gru->Op()->GetAttr("is_reverse"));
    // TODO(TJ): This should be a option for infer
    op_desc.SetAttr("use_seq", true);
@@ -82,14 +73,12 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
    PADDLE_ENFORCE(scope);
    if (with_fc_bias) {
      // Fusion GRU bias = fcbias + grubias
-      auto* fusion_bias_var = scope->Var(NEW_NAME(bias) + bias_n->Name());
+      auto* fusion_bias_var = scope->Var(NEW_NAME(bias) + bias->Name());
      auto* out_bias_tensor =
          fusion_bias_var->GetMutable<framework::LoDTensor>();
      PADDLE_ENFORCE(fusion_bias_var);
-      GET_NODE(fc_bias);
+      auto* gru_bias_var = scope->FindVar(bias->Name());
-      PADDLE_ENFORCE(fc_bias_n);
+      auto* fc_bias_var = scope->FindVar(fc_bias->Name());
-      auto* gru_bias_var = scope->FindVar(bias_n->Name());
-      auto* fc_bias_var = scope->FindVar(fc_bias_n->Name());
      PADDLE_ENFORCE(gru_bias_var);
      PADDLE_ENFORCE(fc_bias_var);
      const auto& gru_bias_tenosr = gru_bias_var->Get<framework::LoDTensor>();
@@ -113,11 +102,11 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
 #undef NEW_NAME
 #undef NEW_IMTERMEDIATE_OUT
-    IR_NODE_LINK_TO(x_n, op);
+    IR_NODE_LINK_TO(x, op);
-    IR_NODE_LINK_TO(weight_x_n, op);
+    IR_NODE_LINK_TO(weight_x, op);
-    IR_NODE_LINK_TO(weight_h_n, op);
+    IR_NODE_LINK_TO(weight_h, op);
-    IR_NODE_LINK_TO(bias_n, op);  // actually should link to new bias if have
+    IR_NODE_LINK_TO(bias, op);  // actually should link to new bias if have
-    IR_NODE_LINK_TO(op, hidden_n);
+    IR_NODE_LINK_TO(op, hidden);
    // h0?
    return op;
  };
@@ -125,42 +114,35 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
  int fusion_count{0};
  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                     Graph* g) {
-#define GET_NODE(name__)                                \
+    auto* x_n = subgraph.at(x);
-  std::string name__##key = name_scope + "/" + #name__; \
+    GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern);
-  auto* name__##n = pattern->RetrieveNode(name__##key); \
+    GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern);
-  PADDLE_ENFORCE(name__##n);                            \
+    GET_IR_NODE_FROM_SUBGRAPH(fc_out, Out, fc_pattern);
-  PADDLE_ENFORCE(subgraph.count(name__##n));            \
+    GET_IR_NODE_FROM_SUBGRAPH(Weight, Weight, gru_pattern);
-  Node* name__##_n = subgraph.at(name__##n);            \
+    GET_IR_NODE_FROM_SUBGRAPH(gru, gru, gru_pattern);
-  int name__ __attribute__((unused)) = name__##_n->id();
+    GET_IR_NODE_FROM_SUBGRAPH(Bias, Bias, gru_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(Hidden, Hidden, gru_pattern);
-    GET_NODE(x);
-    GET_NODE(w);  // fc weight
-    GET_NODE(mul);
-    GET_NODE(fc_out);
-    GET_NODE(Weight);
-    GET_NODE(gru);
-    GET_NODE(Bias);
-    GET_NODE(Hidden);
    // nodes need be removed
-    GET_NODE(BatchGate);
+    GET_IR_NODE_FROM_SUBGRAPH(BatchGate, BatchGate, gru_pattern);
-    GET_NODE(BatchResetHiddenPrev);
+    GET_IR_NODE_FROM_SUBGRAPH(BatchResetHiddenPrev, BatchGate, gru_pattern);
-    GET_NODE(BatchHidden);
+    GET_IR_NODE_FROM_SUBGRAPH(BatchHidden, BatchGate, gru_pattern);
    if (with_fc_bias) {
-      GET_NODE(mul_out);
+      GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern);
-      GET_NODE(fc_bias);
+      GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern);
-      GET_NODE(elementwise_add);
+      GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern);
-      gru_creater(gru, x, w, Weight, Bias, Hidden, fc_bias);
+      gru_creater(gru, x_n, w, Weight, Bias, Hidden, fc_bias);
      // Remove unneeded nodes.
      std::unordered_set<const Node*> marked_nodes(
-          {mul_n, gru_n, elementwise_add_n, fc_bias_n, fc_out_n, mul_out_n,
+          {mul, gru, elementwise_add, fc_bias, fc_out, mul_out, BatchGate,
-           BatchGate_n, BatchResetHiddenPrev_n, BatchHidden_n});
+           BatchResetHiddenPrev, BatchHidden});
      GraphSafeRemoveNodes(graph, marked_nodes);
    } else {
-      gru_creater(gru, x, w, Weight, Bias, Hidden, -1);
+      gru_creater(gru, x_n, w, Weight, Bias, Hidden, nullptr);
      // Remove unneeded nodes.
      std::unordered_set<const Node*> marked_nodes(
-          {mul_n, gru_n, BatchGate_n, BatchResetHiddenPrev_n, BatchHidden_n});
+          {mul, gru, BatchGate, BatchResetHiddenPrev, BatchHidden});
      GraphSafeRemoveNodes(graph, marked_nodes);
    }
 #undef GET_NODE

--- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
@@ -20,45 +20,29 @@ namespace paddle {
 namespace framework {
 namespace ir {
-static std::string GenNodeName(const std::string& prefix,
+int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
-                               const std::string& name) {
+                bool with_fc_bias) {
-  return prefix + "/" + name;
+  GraphPatternDetector gpd;
-}
+  auto* pattern = gpd.mutable_pattern();
-static void BuildPattern(PDPattern* pattern, const std::string& name_scope,
+  // Build pattern
-                         bool with_fc_bias) {
+  PDNode* x = pattern->NewNode(patterns::PDNodeName(name_scope, "x"))
-  PDNode* x = pattern->NewNode(name_scope, "x")
                  ->assert_is_op_input("mul")
                  ->assert_var_not_persistable();
-  auto* fc_out = patterns::FC(pattern, name_scope, x, with_fc_bias);
+  patterns::FC fc_pattern(pattern, name_scope);
-  fc_out->AsIntermediate();  // fc_out is a tmp var, will be removed after fuse.
-  patterns::LSTM(pattern, name_scope, fc_out);
-  // LOG(INFO) << "\n" << pattern->DotString();
-}
-static int BuildFusion(Graph* graph, const std::string& name_scope,
-                       Scope* scope, bool with_fc_bias) {
-  GraphPatternDetector gpd;
-  auto* pattern = gpd.mutable_pattern();
-  BuildPattern(pattern, name_scope, with_fc_bias);
+  // fc_out is a tmp var, will be removed after fuse, so marked as intermediate.
+  auto* fc_out = fc_pattern(x, with_fc_bias)->AsIntermediate();
+  patterns::LSTM lstm_pattern(pattern, name_scope);
+  lstm_pattern(fc_out);
  // Create New OpDesc
-  auto lstm_creator = [&](int lstm, int input, int weight_x, int weight_h,
+  auto lstm_creator = [&](Node* lstm, Node* input, Node* weight_x,
-                          int bias, int hidden, int cell, int xx, int fc_bias) {
+                          Node* weight_h, Node* bias, Node* hidden, Node* cell,
-#define GET_NODE(x) auto* x##_n = graph->RetriveNode(x);
+                          Node* xx, Node* fc_bias) {
-    GET_NODE(input);
-    GET_NODE(weight_x);
-    GET_NODE(weight_h);
-    GET_NODE(bias);
-    GET_NODE(hidden);
-    GET_NODE(cell);
-    GET_NODE(xx);
-    GET_NODE(lstm);
    OpDesc op_desc;
    op_desc.SetType("fusion_lstm");
-#define SET_IN(Key, node__) op_desc.SetInput(#Key, {node__##_n->Name()});
+#define SET_IN(Key, node__) op_desc.SetInput(#Key, {node__->Name()});
    SET_IN(X, input);
    SET_IN(WeightX, weight_x);
    SET_IN(WeightH, weight_h);
@@ -71,13 +55,12 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
      auto* bias_var = scope->Var(new_bias_var);
      PADDLE_ENFORCE(bias_var);
      auto* bias_tensor = bias_var->GetMutable<framework::LoDTensor>();
-      auto* lstm_bias_var = scope->FindVar(bias_n->Name());
+      auto* lstm_bias_var = scope->FindVar(bias->Name());
      PADDLE_ENFORCE(lstm_bias_var);
      const auto& lstm_bias_tensor = lstm_bias_var->Get<framework::LoDTensor>();
      bias_tensor->Resize(lstm_bias_tensor.dims());
-      GET_NODE(fc_bias);
+      auto* fc_bias_var = scope->FindVar(fc_bias->Name());
-      auto* fc_bias_var = scope->FindVar(fc_bias_n->Name());
      const auto& fc_bias_tensor = fc_bias_var->Get<framework::LoDTensor>();
      auto* data = bias_tensor->mutable_data<float>(platform::CPUPlace());
@@ -88,31 +71,36 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
      }
      op_desc.SetInput("Bias", {new_bias_var});
    }
-#undef GET_NODE
    // Create temp variables.
-    scope->Var(name_scope + "/BatchedInput.new")
+    const std::string BatchedInput = patterns::UniqueKey("BatchedInput");
-        ->GetMutable<framework::LoDTensor>();
+    const std::string BatchedCellPreAct =
-    scope->Var(name_scope + "/BatchCellPreAct.new")
+        patterns::UniqueKey("BatchedCellPreAct");
-        ->GetMutable<framework::LoDTensor>();
+    const std::string BatchedGate = patterns::UniqueKey("BatchedGate");
-    scope->Var(name_scope + "/BatchedGate.new")
-        ->GetMutable<framework::LoDTensor>();
+    scope->Var(BatchedInput)->GetMutable<framework::LoDTensor>();
+    scope->Var(BatchedCellPreAct)->GetMutable<framework::LoDTensor>();
+    scope->Var(BatchedGate)->GetMutable<framework::LoDTensor>();
    op_desc.SetInput("H0", {});
    op_desc.SetInput("C0", {});
-    op_desc.SetOutput("Hidden", {hidden_n->Name()});
+    op_desc.SetOutput("Hidden", {hidden->Name()});
-    op_desc.SetOutput("Cell", {cell_n->Name()});
+    op_desc.SetOutput("Cell", {cell->Name()});
-    op_desc.SetOutput("XX", {xx_n->Name()});
+    op_desc.SetOutput("XX", {xx->Name()});
-    op_desc.SetOutput("BatchedGate", {name_scope + "/BatchedGate.new"});
+    op_desc.SetOutput("BatchedGate", {BatchedGate});
-    op_desc.SetOutput("BatchCellPreAct", {name_scope + "/BatchCellPreAct.new"});
+    op_desc.SetOutput("BatchCellPreAct", {BatchedCellPreAct});
-    op_desc.SetOutput("BatchedInput", {name_scope + "/BatchedInput.new"});
+    op_desc.SetOutput("BatchedInput", {BatchedInput});
-    op_desc.SetAttr("is_reverse", lstm_n->Op()->GetAttr("is_reverse"));
+    op_desc.SetAttr("is_reverse", lstm->Op()->GetAttr("is_reverse"));
-    op_desc.SetAttr("use_peepholes", lstm_n->Op()->GetAttr("use_peepholes"));
+    op_desc.SetAttr("use_peepholes", lstm->Op()->GetAttr("use_peepholes"));
    // TODO(TJ): get from attr
    op_desc.SetAttr("use_seq", true);
-#define TMP_NAME(x) "at.new.tmp." #x
+    PADDLE_ENFORCE(graph->Has(kParamScopeAttr));
-#define OP_SET_OUT(x) op_desc.SetOutput(#x, {TMP_NAME(x)})
+    auto* scope = graph->Get<Scope*>(kParamScopeAttr);
+#define OP_SET_OUT(x)                            \
+  const std::string x = patterns::UniqueKey(#x); \
+  op_desc.SetOutput(#x, {x});                    \
+  scope->Var(x)->GetMutable<LoDTensor>()
    OP_SET_OUT(BatchedCell);
    OP_SET_OUT(BatchedHidden);
    OP_SET_OUT(ReorderedH0);
@@ -120,22 +108,11 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
 #undef OP_SET_OUT
    auto* op = graph->CreateOpNode(&op_desc);
-    PADDLE_ENFORCE(graph->Has(kParamScopeAttr));
+    IR_NODE_LINK_TO(input, op);
-    auto* scope = graph->Get<Scope*>(kParamScopeAttr);
+    IR_NODE_LINK_TO(weight_x, op);
+    IR_NODE_LINK_TO(weight_h, op);
-#define TMP_NEW(x) scope->Var(TMP_NAME(x))->GetMutable<LoDTensor>()
+    IR_NODE_LINK_TO(bias, op);
-    TMP_NEW(BatchedCell);
+    IR_NODE_LINK_TO(op, hidden);
-    TMP_NEW(BatchedHidden);
-    TMP_NEW(ReorderedH0);
-    TMP_NEW(ReorderedC0);
-#undef TMP_NEW
-#undef TMP_NAME
-    IR_NODE_LINK_TO(input_n, op);
-    IR_NODE_LINK_TO(weight_x_n, op);
-    IR_NODE_LINK_TO(weight_h_n, op);
-    IR_NODE_LINK_TO(bias_n, op);
-    IR_NODE_LINK_TO(op, hidden_n);
    return op;
  };
@@ -143,39 +120,32 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                     Graph* g) {
-#define GET_NODE(name__)                                \
-  std::string name__##key = name_scope + "/" + #name__; \
-  auto* name__##n = pattern->RetrieveNode(name__##key); \
-  PADDLE_ENFORCE(name__##n);                            \
-  PADDLE_ENFORCE(subgraph.count(name__##n));            \
-  Node* name__##_n = subgraph.at(name__##n);            \
-  int name__ __attribute__((unused)) = name__##_n->id();
-    GET_NODE(x);
-    GET_NODE(w);
-    GET_NODE(mul);
-    GET_NODE(fc_out);
-    GET_NODE(Weight);
-    GET_NODE(lstm);
-    GET_NODE(Bias);
-    GET_NODE(Hidden);
-    GET_NODE(Cell);
+    GET_IR_NODE_FROM_SUBGRAPH(lstm, lstm, lstm_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(Weight, Weight, lstm_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(Bias, Bias, lstm_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(Cell, Cell, lstm_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(Hidden, Hidden, lstm_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern);
    if (with_fc_bias) {
-      GET_NODE(fc_bias);
+      GET_IR_NODE_FROM_SUBGRAPH(fc_out, Out, fc_pattern);
-      GET_NODE(elementwise_add);
+      GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern);
-      lstm_creator(lstm, x, w, Weight, Bias, Hidden, Cell, fc_out, fc_bias);
+      GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern);
+      lstm_creator(lstm, subgraph.at(x), w, Weight, Bias, Hidden, Cell, fc_out,
+                   fc_bias);
      // Remove unneeded nodes.
      std::unordered_set<const Node*> marked_nodes(
-          {mul_n, lstm_n, elementwise_add_n});
+          {mul, lstm, elementwise_add});
      GraphSafeRemoveNodes(graph, marked_nodes);
    } else {
-      lstm_creator(lstm, x, w, Weight, Bias, Hidden, Cell, fc_out, -1);
+      GET_IR_NODE_FROM_SUBGRAPH(fc_out, mul_out, fc_pattern);
+      lstm_creator(lstm, subgraph.at(x), w, Weight, Bias, Hidden, Cell, fc_out,
+                   nullptr);
      // Remove unneeded nodes.
-      std::unordered_set<const Node*> marked_nodes({mul_n, lstm_n});
+      std::unordered_set<const Node*> marked_nodes({mul, lstm});
      GraphSafeRemoveNodes(graph, marked_nodes);
    }
-#undef GET_NODE
    ++fusion_count;
  };

--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -21,6 +21,7 @@
 #include "paddle/fluid/framework/ir/graph_traits.h"
 #include "paddle/fluid/framework/ir/graph_viz_pass.h"
 #include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/string/printf.h"
 namespace paddle {
 namespace framework {
@@ -106,8 +107,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph& graph) {
  for (auto& pdnode : pattern_.nodes()) {
    if (!pdnodes2nodes_.count(pdnode.get())) {
      VLOG(4) << pdnode->name() << " can't find matched Node, early stop";
+      // return false;
-      return false;
    }
  }
  for (auto& item : pdnodes2nodes_) {
@@ -517,87 +517,89 @@ bool VarLinksFromOp(Node* node, const std::string& op_type) {
  return false;
 }
-PDNode* patterns::FC(PDPattern* pattern, const std::string& name_scope,
+PDNode* patterns::FC::operator()(paddle::framework::ir::PDNode* x,
-                     PDNode* x, bool with_bias) {
+                                 bool with_bias) {
-  // mul op
+  // Create shared nodes.
-  auto* mul_op = pattern->NewNode(name_scope, "mul")->assert_is_op("mul");
+  x->assert_is_op_input("mul", "X");
-  auto* mul_weight_var = pattern->NewNode(name_scope, "w")
+  auto* mul = pattern->NewNode(mul_repr())->assert_is_op("mul");
-                             ->AsInput()
-                             ->assert_is_persistable_var()
+  auto* mul_w_var = pattern->NewNode(w_repr())
-                             ->assert_is_op_input("mul", "Y");
+                        ->AsInput()
+                        ->assert_is_persistable_var()
-  PDNode* fc_out{nullptr};
+                        ->assert_is_op_input("mul", "Y");
-  if (with_bias) {
-    PDNode* elementwise_add_op{nullptr};
+  auto* mul_out_var =
-    PDNode *mul_out_var{nullptr}, *bias{nullptr};
+      pattern->NewNode(mul_out_repr())->assert_is_op_output("mul");
-    elementwise_add_op = pattern->NewNode(name_scope, "elementwise_add")
-                             ->assert_is_op("elementwise_add");
+  if (!with_bias) {  // not with bias
-    // intermediate variable, will be removed in the IR after fuse.
+    // Add links.
-    mul_out_var = pattern->NewNode(name_scope, "mul_out")
+    mul->LinksFrom({x, mul_w_var}).LinksTo({mul_out_var});
-                      ->AsIntermediate()
+    return mul_out_var;
-                      ->assert_is_only_output_of_op("mul")
-                      ->assert_is_op_input("elementwise_add");
+  } else {  // with bias
-    // bias
+    mul_out_var->AsIntermediate()->assert_is_op_input("elementwise_add");
-    bias = pattern->NewNode(name_scope, "fc_bias")
+    // Create operators.
-               ->AsInput()
+    auto* elementwise_add = pattern->NewNode(elementwise_add_repr())
-               ->assert_is_op_input("elementwise_add");
+                                ->assert_is_op("elementwise_add");
-    // output
+    // Create variables.
-    fc_out = pattern->NewNode(name_scope, "fc_out")
+    auto* bias = pattern->NewNode(bias_repr())
-                 ->AsOutput()
+                     ->assert_is_op_input("elementwise_add")
-                 ->assert_is_op_output("elementwise_add");
+                     ->AsInput();
-    mul_op->LinksFrom({x, mul_weight_var}).LinksTo({mul_out_var});
-    elementwise_add_op->LinksFrom({mul_out_var, bias}).LinksTo({fc_out});
+    auto* fc_out = pattern->NewNode(Out_repr())
-  } else {
+                       ->AsOutput()
-    fc_out = pattern->NewNode(name_scope, "fc_out")
+                       ->assert_is_op_output("elementwise_add");
-                 ->AsOutput()
-                 ->assert_is_op_output("mul");
+    mul->LinksFrom({mul_w_var, x}).LinksTo({mul_out_var});
-    mul_op->LinksFrom({mul_weight_var, x}).LinksTo({fc_out});
+    elementwise_add->LinksFrom({mul_out_var, bias}).LinksTo({fc_out});
+    return fc_out;
  }
-  return fc_out;
 }
-#define NEW_NODE(op__, arg__, io__)                  \
+PDNode* patterns::LSTM::operator()(PDNode* x) {
-  auto* arg__ = pattern->NewNode(name_scope, #arg__) \
-                    ->assert_is_op_##io__(#op__, #arg__);
-PDNode* patterns::LSTM(PDPattern* pattern, const std::string& name_scope,
-                       PDNode* x) {
  x->assert_is_op_input("lstm", "Input");
-  auto* lstm_op = pattern->NewNode(name_scope, "lstm")->assert_is_op("lstm");
+  auto* lstm_op = pattern->NewNode(lstm_repr())->assert_is_op("lstm");
+#define NEW_NODE(arg__, io__) \
+  auto* arg__ =               \
+      pattern->NewNode(arg__##_repr())->assert_is_op_##io__("lstm", #arg__);
  // Currently, the H0 and C0 are optional
  // TODO(Superjomn) upgrade the fuse framework to support optional.
  // NEW_NODE(H0, input);
  // NEW_NODE(C0, input);
-  NEW_NODE(lstm, Weight, input);
+  NEW_NODE(Weight, input);
-  NEW_NODE(lstm, Bias, input);
+  NEW_NODE(Bias, input);
-  NEW_NODE(lstm, Hidden, output);
+  NEW_NODE(Hidden, output);
-  NEW_NODE(lstm, Cell, output);
+  NEW_NODE(Cell, output);
-  NEW_NODE(lstm, BatchGate, output);
+  NEW_NODE(BatchGate, output);
-  NEW_NODE(lstm, BatchCellPreAct, output);
+  NEW_NODE(BatchCellPreAct, output);
+#undef NEW_NODE
  lstm_op->LinksFrom({x, Weight, Bias});
  lstm_op->LinksTo({Hidden, Cell, BatchGate, BatchCellPreAct});
  return Hidden;
 }
-PDNode* patterns::GRU(PDPattern* pattern, const std::string& name_scope,
+PDNode* patterns::GRU::operator()(PDNode* x) {
-                      PDNode* x) {
  x->assert_is_op_input("gru", "Input");
-  auto* gru_op = pattern->NewNode(name_scope, "gru")->assert_is_op("gru");
+  auto* gru_op = pattern->NewNode(gru_repr())->assert_is_op("gru");
+#define NEW_NODE(arg__, io__) \
+  auto* arg__ =               \
+      pattern->NewNode(arg__##_repr())->assert_is_op_##io__("gru", #arg__);
-  NEW_NODE(gru, Weight, input);
+  NEW_NODE(Weight, input);
  // TODO(Superjomn): upgrade the fuse framework to support optional.
  // H0 and bias are optional
-  NEW_NODE(gru, Bias, input);  // also optional
+  NEW_NODE(Bias, input);  // also optional
  // NEW_NODE(H0, input);
-  NEW_NODE(gru, Hidden, output);
+  NEW_NODE(Hidden, output);
  // below are intermediate
-  NEW_NODE(gru, BatchGate, output);
+  NEW_NODE(BatchGate, output);
-  NEW_NODE(gru, BatchResetHiddenPrev, output);
+  NEW_NODE(BatchResetHiddenPrev, output);
-  NEW_NODE(gru, BatchHidden, output);
+  NEW_NODE(BatchHidden, output);
+#undef NEW_NODE
  BatchGate->AsIntermediate();
  BatchResetHiddenPrev->AsIntermediate();
@@ -607,7 +609,6 @@ PDNode* patterns::GRU(PDPattern* pattern, const std::string& name_scope,
  gru_op->LinksTo({Hidden, BatchGate, BatchResetHiddenPrev, BatchHidden});
  return Hidden;
 }
-#undef NEW_NODE
 }  // namespace ir
 }  // namespace framework

--- a/paddle/fluid/framework/ir/graph_pattern_detector.h
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.h
@@ -286,22 +286,148 @@ void GraphSafeRemoveNodes(Graph* graph,
                          const std::unordered_set<const Node*>& nodes);
 // Some pre-defined patterns those can be reused in multiple passes.
+// The related Fluid Layer or Op should be one pattern here for better reusage
+// accross different fusion.
 namespace patterns {
+struct KeyCounter {
+  static KeyCounter& Instance() {
+    static KeyCounter x;
+    return x;
+  }
+  int IncCounter(const std::string& key) { return dic_[key]++; }
+ private:
+  std::unordered_map<std::string, size_t> dic_;
+};
+// Generate a unique PDNode's name with name_scope and id.
+// The format is {name_scope}/{repr}/{id}/{name}
+static std::string PDNodeName(const std::string& name_scope,
+                              const std::string& repr, size_t id,
+                              const std::string& name) {
+  return string::Sprintf("%s/%s/%d/%s", name_scope, repr, id, name);
+}
+// Generate a unique PDNode's name.
+// The format is {name_scope}/{repr}/{id}
+static std::string PDNodeName(const std::string& name_scope,
+                              const std::string& repr) {
+  return string::Sprintf("%s/%s/%d", name_scope, repr,
+                         KeyCounter::Instance().IncCounter(repr));
+}
+// Generate a unique key. It can be used for a universally unique temporary
+// name.
+// The format is {repr}/{id}
+static std::string UniqueKey(const std::string& repr) {
+  return string::Sprintf("%s/%d", repr,
+                         KeyCounter::Instance().IncCounter(repr));
+}
+// Declare a PDNode in a pattern, will create two methods:
+// std::string xxx_repr(); return this PDNode's string id.
+// PDNode* xxx_n(); return the corresponding PDNode.
+#define PATTERN_DECL_NODE(name__)                        \
+  std::string name__##_repr() const {                    \
+    return PDNodeName(name_scope_, repr_, id_, #name__); \
+  }                                                      \
+  PDNode* name__##_n() const { return pattern->RetrieveNode(name__##_repr()); }
+// Get an ir::Node* from the matched subgraph.
+// var: variable.
+// arg: the argument declared by PATTERN_DECL_NODE in a pattern definition.
+// pat: the pattern object.
+#define GET_IR_NODE_FROM_SUBGRAPH(var, arg, pat)                    \
+  PADDLE_ENFORCE(subgraph.count(pat.arg##_n()),                     \
+                 "Node not found for PDNode %s", pat.arg##_repr()); \
+  Node* var = subgraph.at(pat.arg##_n());                           \
+  PADDLE_ENFORCE(var, "node %s not exists in the sub-graph", #arg)
+// The base class of all the patterns.
+struct PatternBase {
+  PatternBase(PDPattern* pattern, const std::string& name_scope,
+              const std::string& repr)
+      : pattern(pattern),
+        name_scope_(name_scope),
+        repr_(repr),
+        id_(KeyCounter::Instance().IncCounter(repr)) {}
+  PDPattern* pattern;
+ protected:
+  std::string name_scope_;
+  std::string repr_;
+  size_t id_;
+};
 // FC with bias
 // op: mul + elementwise_add
 // named nodes:
 // mul, elementwise_add
 // w, mul_out, bias, fc_out
-PDNode* FC(PDPattern* pattern, const std::string& name_scope, PDNode* x,
+struct FC : public PatternBase {
-           bool with_bias);
+  FC(PDPattern* pattern, const std::string& name_scope)
+      : PatternBase(pattern, name_scope, "fc") {}
+  PDNode* operator()(PDNode* x, bool with_bias);
+  // declare operator node's name
+  PATTERN_DECL_NODE(fc);
+  PATTERN_DECL_NODE(mul);
+  PATTERN_DECL_NODE(elementwise_add);
+  // declare variable node's name
+  PATTERN_DECL_NODE(w);
+  PATTERN_DECL_NODE(mul_out);  // (x,w) -> mul_out
+  PATTERN_DECL_NODE(bias);
+  PATTERN_DECL_NODE(Out);
+};
+struct LSTM : public PatternBase {
+  LSTM(PDPattern* pattern, const std::string& name_scope)
+      : PatternBase(pattern, name_scope, "lstm") {}
-PDNode* LSTM(PDPattern* pattern, const std::string& name_scope, PDNode* x);
+  PDNode* operator()(PDNode* x);
-PDNode* GRU(PDPattern* pattern, const std::string& name_scope, PDNode* x);
+  // Operators
+  PATTERN_DECL_NODE(lstm);
+  // Inputs
+  PATTERN_DECL_NODE(Input);
+  PATTERN_DECL_NODE(H0);
+  PATTERN_DECL_NODE(C0);
+  PATTERN_DECL_NODE(Weight);
+  PATTERN_DECL_NODE(Bias);
+  // Outputs
+  PATTERN_DECL_NODE(Hidden);
+  PATTERN_DECL_NODE(Cell);
+  PATTERN_DECL_NODE(BatchGate);
+  PATTERN_DECL_NODE(BatchCellPreAct);
+};
+struct GRU : public PatternBase {
+  GRU(PDPattern* pattern, const std::string& name_scope)
+      : PatternBase(pattern, name_scope, "lstm") {}
+  PDNode* operator()(PDNode* x);
+  // Operators
+  PATTERN_DECL_NODE(gru);
+  // Inputs
+  PATTERN_DECL_NODE(Bias);
+  PATTERN_DECL_NODE(Weight);
+  // Outputs
+  PATTERN_DECL_NODE(BatchGate);
+  PATTERN_DECL_NODE(BatchResetHiddenPrev);
+  PATTERN_DECL_NODE(BatchHidden);
+  PATTERN_DECL_NODE(Hidden);
+};
 }  // namespace patterns
+// Link two ir::Nodes from each other.
 #define IR_NODE_LINK_TO(a, b) \
  a->outputs.push_back(b);    \
  b->inputs.push_back(a);

--- a/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
@@ -192,6 +192,8 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
  auto* id = subgraph.at(pattern.RetrieveNode(#id));        \
  PADDLE_ENFORCE_NOT_NULL(id, "subgraph has no node %s", #id);
+  int fuse_count{0};
  detector(graph.get(), [&](const GraphPatternDetector::subgraph_t& subgraph,
                            Graph* graph) {
    VLOG(4) << "get one concat pattern";
@@ -239,8 +241,12 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
    marked_nodes.erase(sequence_expand1_in);
    marked_nodes.erase(fc_out);
    GraphSafeRemoveNodes(graph, marked_nodes);
+    ++fuse_count;
  });
+  AddStatis(fuse_count);
  return graph;
 }

--- a/paddle/fluid/inference/analysis/CMakeLists.txt
+++ b/paddle/fluid/inference/analysis/CMakeLists.txt
@@ -48,18 +48,18 @@ function (inference_download_and_uncompress install_dir url gz_filename)
    message(STATUS "finish downloading ${gz_filename}")
 endfunction(inference_download_and_uncompress)
-set(DITU_RNN_MODEL_URL "http://paddle-inference-dist.bj.bcebos.com/ditu_rnn_fluid%2Fmodel.tar.gz")
+set(RNN1_MODEL_URL "http://paddle-inference-dist.bj.bcebos.com/rnn1%2Fmodel.tar.gz")
-set(DITU_RNN_DATA_URL "http://paddle-inference-dist.bj.bcebos.com/ditu_rnn_fluid%2Fdata.txt.tar.gz")
+set(RNN1_DATA_URL "http://paddle-inference-dist.bj.bcebos.com/rnn1%2Fdata.txt.tar.gz")
-set(DITU_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo/ditu_rnn" CACHE PATH "Ditu RNN model and data root." FORCE)
+set(RNN1_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo/rnn1" CACHE PATH "RNN1 model and data root." FORCE)
-if (NOT EXISTS ${DITU_INSTALL_DIR} AND WITH_TESTING)
+if (NOT EXISTS ${RNN1_INSTALL_DIR} AND WITH_TESTING)
-  inference_download_and_uncompress(${DITU_INSTALL_DIR} ${DITU_RNN_MODEL_URL} "ditu_rnn_fluid%2Fmodel.tar.gz")
+  inference_download_and_uncompress(${RNN1_INSTALL_DIR} ${RNN1_MODEL_URL} "rnn1%2Fmodel.tar.gz")
-  inference_download_and_uncompress(${DITU_INSTALL_DIR} ${DITU_RNN_DATA_URL} "ditu_rnn_fluid%2Fdata.txt.tar.gz")
+  inference_download_and_uncompress(${RNN1_INSTALL_DIR} ${RNN1_DATA_URL} "rnn1%2Fdata.txt.tar.gz")
 endif()
 inference_analysis_test(test_analyzer SRCS analyzer_tester.cc
    EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor
-    ARGS --infer_ditu_rnn_model=${DITU_INSTALL_DIR}/model
+    ARGS --infer_model=${RNN1_INSTALL_DIR}/model
-         --infer_ditu_rnn_data=${DITU_INSTALL_DIR}/data.txt)
+         --infer_data=${RNN1_INSTALL_DIR}/data.txt)
 inference_analysis_test(test_data_flow_graph SRCS data_flow_graph_tester.cc)
 inference_analysis_test(test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc)

--- a/paddle/fluid/inference/analysis/analyzer_tester.cc
+++ b/paddle/fluid/inference/analysis/analyzer_tester.cc
@@ -26,8 +26,8 @@
 #include "paddle/fluid/inference/api/paddle_inference_pass.h"
 #include "paddle/fluid/inference/utils/singleton.h"
-DEFINE_string(infer_ditu_rnn_model, "", "model path for ditu RNN");
+DEFINE_string(infer_model, "", "model path");
-DEFINE_string(infer_ditu_rnn_data, "", "data path for ditu RNN");
+DEFINE_string(infer_data, "", "data path");
 DEFINE_int32(batch_size, 10, "batch size.");
 DEFINE_int32(repeat, 1, "Running the inference program repeat times.");
 DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads.");
@@ -223,17 +223,6 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
 }  // namespace
-const float ditu_rnn_target_data[] = {
-    104.711, 11.2431, 1.35422, 0,       0,       0,       0,       0,
-    27.7039, 1.41486, 7.09526, 0,       0,       0,       0,       0,
-    7.6481,  6.5324,  56.383,  2.88018, 8.92918, 132.007, 4.27429, 2.02934,
-    14.1727, 10.7461, 25.0616, 16.0197, 14.4163, 16.9199, 6.75517, 0,
-    80.0249, 4.77739, 0,       0,       0,       0,       0,       0,
-    47.5643, 2.67029, 8.76252, 0,       0,       0,       0,       0,
-    51.8822, 4.4411,  0,       0,       0,       0,       0,       0,
-    10.7286, 12.0595, 10.6672, 0,       0,       0,       0,       0,
-    93.5771, 3.84641, 0,       0,       0,       0,       0,       0,
-    169.426, 0,       0,       0,       0,       0,       0,       0};
 void CompareResult(const std::vector<PaddleTensor> &outputs,
                   const std::vector<PaddleTensor> &base_outputs) {
  PADDLE_ENFORCE_GT(outputs.size(), 0);
@@ -255,11 +244,10 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
  }
 }
 // Test with a really complicate model.
-void TestDituRNNPrediction(bool use_analysis, bool activate_ir,
+void TestRNN1Prediction(bool use_analysis, bool activate_ir, int num_threads) {
-                           int num_threads) {
  AnalysisConfig config;
-  config.prog_file = FLAGS_infer_ditu_rnn_model + "/__model__";
+  config.prog_file = FLAGS_infer_model + "/__model__";
-  config.param_file = FLAGS_infer_ditu_rnn_model + "/param";
+  config.param_file = FLAGS_infer_model + "/param";
  config.use_gpu = false;
  config.device = 0;
  config.specify_input_name = true;
@@ -267,6 +255,7 @@ void TestDituRNNPrediction(bool use_analysis, bool activate_ir,
  PADDLE_ENFORCE(config.ir_mode ==
                 AnalysisConfig::IrPassMode::kExclude);  // default
  config.ir_passes.clear();  // Do not exclude any pass.
  int batch_size = FLAGS_batch_size;
  int num_times = FLAGS_repeat;
@@ -276,7 +265,7 @@ void TestDituRNNPrediction(bool use_analysis, bool activate_ir,
      CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
          config);
  std::vector<PaddleTensor> input_slots;
-  DataRecord data(FLAGS_infer_ditu_rnn_data, batch_size);
+  DataRecord data(FLAGS_infer_data, batch_size);
  // Prepare inputs.
  PrepareInputs(&input_slots, &data, batch_size);
  std::vector<PaddleTensor> outputs, base_outputs;
@@ -306,7 +295,7 @@ void TestDituRNNPrediction(bool use_analysis, bool activate_ir,
      threads.emplace_back([&, tid]() {
        // Each thread should have local input_slots and outputs.
        std::vector<PaddleTensor> input_slots;
-        DataRecord data(FLAGS_infer_ditu_rnn_data, batch_size);
+        DataRecord data(FLAGS_infer_data, batch_size);
        PrepareInputs(&input_slots, &data, batch_size);
        std::vector<PaddleTensor> outputs;
        Timer timer;
@@ -346,30 +335,29 @@ void TestDituRNNPrediction(bool use_analysis, bool activate_ir,
    ASSERT_TRUE(fuse_statis.count("fc_fuse"));
    EXPECT_EQ(fuse_statis.at("fc_fuse"), 1);
    EXPECT_EQ(fuse_statis.at("fc_nobias_lstm_fuse"), 2);  // bi-directional LSTM
+    EXPECT_EQ(fuse_statis.at("seq_concat_fc_fuse"), 1);
    EXPECT_EQ(num_ops,
              13);  // After graph optimization, only 13 operators exists.
  }
 }
 // Inference with analysis and IR, easy for profiling independently.
-TEST(Analyzer, DituRNN) {
+TEST(Analyzer, rnn1) { TestRNN1Prediction(true, true, FLAGS_num_threads); }
-  TestDituRNNPrediction(true, true, FLAGS_num_threads);
-}
-// Other unit-tests of DituRNN, test different options of use_analysis,
+// Other unit-tests of RNN1, test different options of use_analysis,
 // activate_ir and multi-threads.
-TEST(Analyzer, DituRNN_tests) {
+TEST(Analyzer, RNN_tests) {
  int num_threads[2] = {1, 4};
  for (auto i : num_threads) {
    // Directly infer with the original model.
-    TestDituRNNPrediction(false, false, i);
+    TestRNN1Prediction(false, false, i);
    // Inference with the original model with the analysis turned on, the
    // analysis
    // module will transform the program to a data flow graph.
-    TestDituRNNPrediction(true, false, i);
+    TestRNN1Prediction(true, false, i);
    // Inference with analysis and IR. The IR module will fuse some large
    // kernels.
-    TestDituRNNPrediction(true, true, i);
+    TestRNN1Prediction(true, true, i);
  }
 }