Merge pull request #16330 from NHZlX/merge_anakin_branch_to_dev

Cherry-pick from PaddlePaddle:feature/anakin-engine: Anakin subgraph support.

Merge pull request #16330 from NHZlX/merge_anakin_branch_to_dev
Cherry-pick from PaddlePaddle:feature/anakin-engine: Anakin subgraph support.
fa1796a3 · Zhaolong Xing · GitHub · d68a02af · 953bdde0 · fa1796a3
105 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -64,6 +64,7 @@ option(WITH_DISTRIBUTE  "Compile with distributed support"              OFF)
 option(WITH_PSLIB       "Compile with pslib support"                    OFF)
 option(WITH_CONTRIB     "Compile the third-party contributation"        OFF)
 option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF)
+# TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter.
 option(WITH_ANAKIN      "Compile with Anakin library"                   OFF)
 option(ANAKIN_BUILD_FAT_BIN "Build anakin cuda fat-bin lib for all device plantform, ignored when WITH_ANAKIN=OFF" OFF)
 option(ANAKIN_BUILD_CROSS_PLANTFORM "Build anakin lib for any nvidia device plantform. ignored when WITH_ANAKIN=OFF" ON)
@@ -190,6 +191,7 @@ include(configure)          # add paddle env configuration
 if(WITH_GPU)
    include(cuda)
    include(tensorrt)
+    include(anakin_subgraph)
 endif()
 if(WITH_MKL OR WITH_MKLML)
    include(external/anakin)

--- a/cmake/anakin_subgraph.cmake
+++ b/cmake/anakin_subgraph.cmake
+if(NOT WITH_GPU)
+    return()
+endif()
+
+set(ANAKIN_ROOT "/usr" CACHE PATH "ANAKIN ROOT")
+find_path(ANAKIN_INCLUDE_DIR anakin_config.h
+    PATHS ${ANAKIN_ROOT} ${ANAKIN_ROOT}/include
+    $ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/include
+    NO_DEFAULT_PATH
+)
+
+find_library(ANAKIN_LIBRARY NAMES libanakin_saber_common.so libanakin.so
+    PATHS ${ANAKIN_ROOT}
+    $ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/lib
+    NO_DEFAULT_PATH
+    DOC "Path to ANAKIN library.")
+
+if(ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY)
+  if(WITH_DSO)
+    set(ANAKIN_FOUND ON)
+  endif(WITH_DSO)
+else()
+    set(ANAKIN_FOUND OFF)
+endif()
+
+if(ANAKIN_FOUND)
+    message(STATUS "Current ANAKIN header is ${ANAKIN_INCLUDE_DIR}/anakin_config.h. ")
+    include_directories(${ANAKIN_ROOT}/include)
+    include_directories(${ANAKIN_ROOT}/include/saber)
+    link_directories(${ANAKIN_ROOT})
+    add_definitions(-DPADDLE_WITH_ANAKIN)
+endif()
--- a/cmake/tensorrt.cmake
+++ b/cmake/tensorrt.cmake
@@ -33,5 +33,6 @@ if(TENSORRT_FOUND)
    message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
        "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
    include_directories(${TENSORRT_INCLUDE_DIR})
+    link_directories(${TENSORRT_LIBRARY})
    add_definitions(-DPADDLE_WITH_TENSORRT)
 endif()
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -68,16 +68,22 @@ pass_library(transpose_flatten_concat_fuse_pass inference)
 pass_library(identity_scale_op_clean_pass base)
 pass_library(sync_batch_norm_pass base)
 pass_library(runtime_context_cache_pass base)
+pass_library(simplify_anakin_detection_pattern_pass inference)
+pass_library(anakin_fillconstant_elementwisemul_fuse inference)

 # There may be many transpose-flatten structures in a model, and the output of
 # these structures will be used as inputs to the concat Op. This pattern will
 # be detected by our pass. The index here represents the number of structures in the
 # pattern. We use index 3 ~ 6, because these quantities of structures are
 # common in the models.
-foreach (index RANGE 3 6)
+foreach (index RANGE 2 6)
   file(APPEND ${pass_file} "USE_PASS(transpose_flatten${index}_concat_fuse_pass);\n")
 endforeach()

+foreach (index RANGE 2 6)
+   file(APPEND ${pass_file} "USE_PASS(simplify_anakin_detection_pattern_pass${index});\n")
+endforeach()
+
 if(WITH_MKLDNN)
    pass_library(mkldnn_placement_pass base mkldnn)
    pass_library(depthwise_conv_mkldnn_pass base mkldnn)

--- a/paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.cc
+++ b/paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+#include <string>
+
+#include "paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h"
+#include "paddle/fluid/framework/ir/graph_viz_pass.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+#define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern);
+#define GET_NODES                 \
+  GET_IR_NODE(fill_constant);     \
+  GET_IR_NODE(fill_constant_out); \
+  GET_IR_NODE(elementwise_mul);   \
+  GET_IR_NODE(elementwise_mul_out);
+
+std::unique_ptr<ir::Graph> AnakinFillconstantElementwisemulFuse::ApplyImpl(
+    std::unique_ptr<ir::Graph> graph) const {
+  const std::string pattern_name = "anakin_fillconstant_elementwisemul_fuse";
+  FusePassBase::Init(pattern_name, graph.get());
+
+  GraphPatternDetector gpd;
+  auto* x = gpd.mutable_pattern()
+                ->NewNode("x")
+                ->assert_is_op_input("elementwise_mul", "X")
+                ->AsInput();
+
+  patterns::AnakinFillConstantElementWiseMulFuse pattern(gpd.mutable_pattern(),
+                                                         pattern_name);
+  pattern(x);
+
+  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
+                     Graph* g) {
+    GET_NODES;
+
+    PADDLE_ENFORCE(subgraph.count(x));
+    auto* elementwise_in = subgraph.at(x);
+    float constant_value =
+        boost::get<float>(fill_constant->Op()->GetAttr("value"));
+
+    framework::OpDesc new_op_desc;
+    new_op_desc.SetType("scale");
+    new_op_desc.SetInput("X", {elementwise_in->Name()});
+    new_op_desc.SetAttr("scale", constant_value);
+    new_op_desc.SetAttr("bias", static_cast<float>(0.0));
+    new_op_desc.SetAttr("bias_after_scale", true);
+    new_op_desc.SetOutput("Out", {elementwise_mul_out->Name()});
+    new_op_desc.Flush();
+
+    // Create a new node for the fused op.
+    auto* scale_op = graph->CreateOpNode(&new_op_desc);
+
+    IR_NODE_LINK_TO(elementwise_in, scale_op);       // Input
+    IR_NODE_LINK_TO(scale_op, elementwise_mul_out);  // Output
+
+    // Delete the unneeded nodes.
+    GraphSafeRemoveNodes(graph.get(),
+                         {fill_constant, fill_constant_out, elementwise_mul});
+  };
+
+  gpd(graph.get(), handler);
+  return graph;
+}
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+REGISTER_PASS(anakin_fillconstant_elementwisemul_fuse,
+              paddle::framework::ir::AnakinFillconstantElementwisemulFuse);
--- a/paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h
+++ b/paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <memory>
+#include "paddle/fluid/framework/ir/fuse_pass_base.h"
+#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+class AnakinFillconstantElementwisemulFuse : public FusePassBase {
+ public:
+  virtual ~AnakinFillconstantElementwisemulFuse() {}
+
+ protected:
+  std::unique_ptr<ir::Graph> ApplyImpl(
+      std::unique_ptr<ir::Graph> graph) const override;
+};
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -1470,6 +1470,171 @@ PDNode *patterns::TransposeFlattenConcat::operator()(
  return concat_out;
 }

+PDNode *patterns::AnakinDetectionPattern::operator()(
+    std::vector<PDNode *> conv_in, int times) {
+  // The times represents the repeat times of the
+  // {prior_box, prior_box_loc_out, flatten, prior_box_var_out, reshape}
+  const int kNumFields = 7;
+  const int kPriorBoxLocOffset = 1;
+  const int kReshape1Offset = 2;
+  const int kReshape1OutOffset = 3;
+  const int kPriorBoxVarOffset = 4;
+  const int kReshape2Offset = 5;
+  const int kReshape2OutOffset = 6;
+
+  const int kBoxCoderThirdInputOffset = times;
+  const int kMultiClassSecondInputNmsOffset = times + 1;
+
+  std::vector<PDNode *> nodes;
+
+  for (int i = 0; i < times; i++) {
+    nodes.push_back(
+        pattern->NewNode(GetNodeName("prior_box" + std::to_string(i)))
+            ->assert_is_op("density_prior_box"));
+    nodes.push_back(pattern->NewNode(GetNodeName("box_out" + std::to_string(i)))
+                        ->assert_is_op_output("density_prior_box", "Boxes")
+                        ->assert_is_op_input("reshape2", "X")
+                        ->AsIntermediate());
+    nodes.push_back(
+        pattern->NewNode(GetNodeName("reshape1" + std::to_string(i)))
+            ->assert_is_op("reshape2"));
+
+    nodes.push_back(
+        pattern->NewNode(GetNodeName("reshape1_out" + std::to_string(i)))
+            ->assert_is_op_output("reshape2")
+            ->assert_is_op_nth_input("concat", "X", i)
+            ->AsIntermediate());
+
+    nodes.push_back(
+        pattern->NewNode(GetNodeName("box_var_out" + std::to_string(i)))
+            ->assert_is_op_output("density_prior_box", "Variances")
+            ->assert_is_op_input("reshape2", "X")
+            ->AsIntermediate());
+    nodes.push_back(
+        pattern->NewNode(GetNodeName("reshape2" + std::to_string(i)))
+            ->assert_is_op("reshape2"));
+
+    nodes.push_back(
+        pattern->NewNode(GetNodeName("reshape2_out" + std::to_string(i)))
+            ->assert_is_op_output("reshape2")
+            ->assert_is_op_nth_input("concat", "X", i)
+            ->AsIntermediate());
+  }
+
+  auto concat_op1 = pattern->NewNode(GetNodeName("concat1"))
+                        ->assert_is_op("concat")
+                        ->assert_op_has_n_inputs("concat", times);
+  auto concat_out1 = pattern->NewNode(GetNodeName("concat1_out"))
+                         ->assert_is_op_output("concat")
+                         ->AsIntermediate();
+
+  auto concat_op2 = pattern->NewNode(GetNodeName("concat2"))
+                        ->assert_is_op("concat")
+                        ->assert_op_has_n_inputs("concat", times);
+  auto concat_out2 = pattern->NewNode(GetNodeName("concat2_out"))
+                         ->assert_is_op_output("concat")
+                         ->AsIntermediate();
+
+  auto box_coder_op = pattern->NewNode(GetNodeName("box_coder"))
+                          ->assert_is_op("box_coder")
+                          ->assert_op_has_n_inputs("box_coder", 3);
+
+  auto box_coder_out = pattern->NewNode(GetNodeName("box_coder_out"))
+                           ->assert_is_op_output("box_coder")
+                           ->AsIntermediate();
+
+  auto transpose_before_nms =
+      pattern->NewNode(GetNodeName("transpose_before_nms"))
+          ->assert_is_op("transpose2");
+
+  auto transpose_before_nms_out =
+      pattern->NewNode(GetNodeName("transpose_before_nms_out"))
+          ->assert_is_op_output("transpose2")
+          ->assert_is_op_input("multiclass_nms", "Scores")
+          ->AsIntermediate();
+
+  auto multiclass_nms_op = pattern->NewNode(GetNodeName("multiclass_nms"))
+                               ->assert_is_op("multiclass_nms")
+                               ->assert_op_has_n_inputs("multiclass_nms", 2);
+
+  auto multiclass_nms_out = pattern->NewNode(GetNodeName("multiclass_nms_out"))
+                                ->assert_is_op_output("multiclass_nms")
+                                ->AsOutput();
+
+  std::vector<PDNode *> reshape1_outs;
+  std::vector<PDNode *> reshape2_outs;
+
+  for (int i = 0; i < times; i++) {
+    conv_in[i]->AsInput();
+    // prior_box
+    nodes[i * kNumFields]->LinksFrom({conv_in[i]});
+    // prior_box box out
+    nodes[i * kNumFields + kPriorBoxLocOffset]->LinksFrom(
+        {nodes[i * kNumFields]});
+    // reshape
+    nodes[i * kNumFields + kReshape1Offset]->LinksFrom(
+        {nodes[i * kNumFields + kPriorBoxLocOffset]});
+    // reshape_out
+    nodes[i * kNumFields + kReshape1OutOffset]->LinksFrom(
+        {nodes[i * kNumFields + kReshape1Offset]});
+
+    nodes[i * kNumFields + kPriorBoxVarOffset]->LinksFrom(
+        {nodes[i * kNumFields]});
+    // reshape
+    nodes[i * kNumFields + kReshape2Offset]->LinksFrom(
+        {nodes[i * kNumFields + kPriorBoxVarOffset]});
+    // reshape_out
+    nodes[i * kNumFields + kReshape2OutOffset]->LinksFrom(
+        {nodes[i * kNumFields + kReshape2Offset]});
+
+    reshape1_outs.push_back(nodes[i * kNumFields + kReshape1OutOffset]);
+    reshape2_outs.push_back(nodes[i * kNumFields + kReshape2OutOffset]);
+  }
+
+  concat_op1->LinksFrom(reshape1_outs);
+  concat_op2->LinksFrom(reshape2_outs);
+  concat_out1->LinksFrom({concat_op1});
+  concat_out2->LinksFrom({concat_op2});
+
+  conv_in[kBoxCoderThirdInputOffset]->AsInput();
+  conv_in[kMultiClassSecondInputNmsOffset]->AsInput();
+
+  box_coder_op->LinksFrom(
+      {concat_out1, concat_out2, conv_in[kBoxCoderThirdInputOffset]});
+  box_coder_out->LinksFrom({box_coder_op});
+
+  transpose_before_nms->LinksFrom({conv_in[kMultiClassSecondInputNmsOffset]});
+  transpose_before_nms_out->LinksFrom({transpose_before_nms});
+
+  multiclass_nms_op->LinksFrom({box_coder_out, transpose_before_nms_out})
+      .LinksTo({multiclass_nms_out});
+
+  return multiclass_nms_out;
+}
+
+PDNode *patterns::AnakinFillConstantElementWiseMulFuse::operator()(
+    PDNode *elementwise_op_input) {
+  auto fill_constant =
+      pattern->NewNode(fill_constant_repr())->assert_is_op("fill_constant");
+
+  auto fill_constant_out = pattern->NewNode(fill_constant_out_repr())
+                               ->assert_is_op_output("fill_constant")
+                               ->assert_is_op_input("elementwise_mul", "Y")
+                               ->AsIntermediate();
+
+  auto elementwise_mul_op =
+      pattern->NewNode(elementwise_mul_repr())->assert_is_op("elementwise_mul");
+
+  auto elementwise_mul_out = pattern->NewNode(elementwise_mul_out_repr())
+                                 ->assert_is_op_output("elementwise_mul")
+                                 ->AsOutput();
+
+  fill_constant_out->LinksFrom({fill_constant});
+  elementwise_mul_op->LinksFrom({elementwise_op_input, fill_constant_out});
+  elementwise_mul_out->LinksFrom({elementwise_mul_op});
+  return elementwise_mul_out;
+}
+
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/ir/graph_pattern_detector.h
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.h
@@ -844,6 +844,36 @@ struct TransposeFlattenConcat : public PatternBase {
  }
 };

+struct AnakinDetectionPattern : public PatternBase {
+  AnakinDetectionPattern(PDPattern* pattern, const std::string& name_scope)
+      : PatternBase(pattern, name_scope, "anakin_detect_pattern") {}
+
+  PDNode* operator()(std::vector<PDNode*> conv_inputs, int times);
+
+  std::string GetNodeName(const std::string& op_type) {
+    return PDNodeName(name_scope_, repr_, id_, op_type);
+  }
+
+  PDNode* GetPDNode(const std::string& op_type) {
+    return pattern->RetrieveNode(GetNodeName(op_type));
+  }
+};
+
+struct AnakinFillConstantElementWiseMulFuse : public PatternBase {
+  AnakinFillConstantElementWiseMulFuse(PDPattern* pattern,
+                                       const std::string& name_scope)
+      : PatternBase(pattern, name_scope,
+                    "anakin_fillconstant_elementwisemul_fuse") {}
+
+  PDNode* operator()(PDNode* elementwise_op_input);
+
+  // declare operator node's name
+  PATTERN_DECL_NODE(fill_constant);
+  PATTERN_DECL_NODE(fill_constant_out);
+  PATTERN_DECL_NODE(elementwise_mul);
+  PATTERN_DECL_NODE(elementwise_mul_out);
+};
+
 }  // namespace patterns

 // Link two ir::Nodes from each other.

--- a/paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.cc
+++ b/paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/framework/ir/graph_viz_pass.h"
+#include "paddle/fluid/framework/ir/node.h"
+#include "paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+template <int times>
+std::unique_ptr<ir::Graph> SimplifyAnakinDetectionPatternPass<times>::ApplyImpl(
+    std::unique_ptr<ir::Graph> graph) const {
+  const std::string pattern_name =
+      "simplify_anakin_detection_pattern_pass" + std::to_string(times);
+  FusePassBase::Init(pattern_name, graph.get());
+
+  GraphPatternDetector gpd;
+  std::vector<PDNode *> input_nodes;
+  for (int i = 0; i < times; i++) {
+    input_nodes.push_back(gpd.mutable_pattern()
+                              ->NewNode("x" + std::to_string(i))
+                              ->assert_is_op_input("density_prior_box", "Input")
+                              ->AsInput());
+  }
+  input_nodes.push_back(gpd.mutable_pattern()
+                            ->NewNode("x" + std::to_string(times))
+                            ->assert_is_op_input("box_coder", "TargetBox")
+                            ->AsInput());
+
+  input_nodes.push_back(gpd.mutable_pattern()
+                            ->NewNode("x" + std::to_string(times + 1))
+                            ->assert_is_op_input("transpose2")
+                            ->AsInput());
+
+  patterns::AnakinDetectionPattern pattern(gpd.mutable_pattern(), pattern_name);
+  pattern(input_nodes, times);
+
+  auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
+                     Graph *g) {
+    const int kNumFields = 7;
+    const int kPriorBoxLocOffset = 1;
+    const int kReshape1Offset = 2;
+    const int kReshape1OutOffset = 3;
+    const int kPriorBoxVarOffset = 4;
+    const int kReshape2Offset = 5;
+    const int kReshape2OutOffset = 6;
+    std::vector<Node *> nodes;
+
+    for (int i = 0; i < times; i++) {
+      PADDLE_ENFORCE(
+          subgraph.at(pattern.GetPDNode("prior_box" + std::to_string(i))));
+      PADDLE_ENFORCE(
+          subgraph.at(pattern.GetPDNode("box_out" + std::to_string(i))));
+      PADDLE_ENFORCE(
+          subgraph.at(pattern.GetPDNode("reshape1" + std::to_string(i))));
+      PADDLE_ENFORCE(
+          subgraph.at(pattern.GetPDNode("reshape1_out" + std::to_string(i))));
+      PADDLE_ENFORCE(
+          subgraph.at(pattern.GetPDNode("reshape2" + std::to_string(i))));
+      PADDLE_ENFORCE(
+          subgraph.at(pattern.GetPDNode("reshape2_out" + std::to_string(i))));
+
+      PADDLE_ENFORCE(
+          subgraph.at(pattern.GetPDNode("box_var_out" + std::to_string(i))));
+
+      nodes.push_back(
+          subgraph.at(pattern.GetPDNode("prior_box" + std::to_string(i))));
+      nodes.push_back(
+          subgraph.at(pattern.GetPDNode("box_out" + std::to_string(i))));
+      nodes.push_back(
+          subgraph.at(pattern.GetPDNode("reshape1" + std::to_string(i))));
+      nodes.push_back(
+          subgraph.at(pattern.GetPDNode("reshape1_out" + std::to_string(i))));
+      nodes.push_back(
+          subgraph.at(pattern.GetPDNode("box_var_out" + std::to_string(i))));
+      nodes.push_back(
+          subgraph.at(pattern.GetPDNode("reshape2" + std::to_string(i))));
+      nodes.push_back(
+          subgraph.at(pattern.GetPDNode("reshape2_out" + std::to_string(i))));
+    }
+
+    Node *concat_op1 = subgraph.at(pattern.GetPDNode("concat1"));
+    Node *concat_out1 = subgraph.at(pattern.GetPDNode("concat1_out"));
+
+    Node *concat_op2 = subgraph.at(pattern.GetPDNode("concat2"));
+    Node *concat_out2 = subgraph.at(pattern.GetPDNode("concat2_out"));
+
+    Node *box_coder_third_input = subgraph.at(input_nodes[times]);
+    Node *box_coder_op = subgraph.at(pattern.GetPDNode("box_coder"));
+    Node *box_coder_out = subgraph.at(pattern.GetPDNode("box_coder_out"));
+
+    Node *multiclass_nms_second_input = subgraph.at(input_nodes[times + 1]);
+    Node *transpose_before_nms =
+        subgraph.at(pattern.GetPDNode("transpose_before_nms"));
+    Node *transpose_before_nms_out =
+        subgraph.at(pattern.GetPDNode("transpose_before_nms_out"));
+
+    Node *multiclass_nms = subgraph.at(pattern.GetPDNode("multiclass_nms"));
+    Node *multiclass_nms_out =
+        subgraph.at(pattern.GetPDNode("multiclass_nms_out"));
+
+    std::string code_type =
+        boost::get<std::string>(box_coder_op->Op()->GetAttr("code_type"));
+    bool box_normalized =
+        boost::get<bool>(box_coder_op->Op()->GetAttr("box_normalized"));
+    // auto variance =
+    // boost::get<std::vector<float>>(box_coder_op->Op()->GetAttr("variance"));
+    int background_label =
+        boost::get<int>(multiclass_nms->Op()->GetAttr("background_label"));
+    float score_threshold =
+        boost::get<float>(multiclass_nms->Op()->GetAttr("score_threshold"));
+    int nms_top_k = boost::get<int>(multiclass_nms->Op()->GetAttr("nms_top_k"));
+    float nms_threshold =
+        boost::get<float>(multiclass_nms->Op()->GetAttr("nms_threshold"));
+    float nms_eta = boost::get<float>(multiclass_nms->Op()->GetAttr("nms_eta"));
+    int keep_top_k =
+        boost::get<int>(multiclass_nms->Op()->GetAttr("keep_top_k"));
+
+    std::vector<std::string> concat1_input_names;
+    for (int i = 0; i < times; i++) {
+      concat1_input_names.push_back(
+          nodes[i * kNumFields + kPriorBoxLocOffset]->Name());
+    }
+
+    // int axis = boost::get<int>(concat_op1->Op()->GetAttr("axis"));
+    framework::OpDesc concat1_desc;
+    concat1_desc.SetType("concat");
+    concat1_desc.SetInput("X", concat1_input_names);
+    concat1_desc.SetAttr("axis", 2);
+    concat1_desc.SetOutput("Out", {concat_out1->Name()});
+
+    auto *new_add_concat_op = graph->CreateOpNode(&concat1_desc);
+
+    for (int i = 0; i < times; i++) {
+      nodes[i * kNumFields + kPriorBoxLocOffset]->outputs.push_back(
+          new_add_concat_op);
+      new_add_concat_op->inputs.push_back(
+          nodes[i * kNumFields + kPriorBoxLocOffset]);
+    }
+
+    framework::OpDesc new_op_desc;
+    new_op_desc.SetType("detection_out");
+    new_op_desc.SetInput("PriorBox", {concat_out1->Name()});
+    new_op_desc.SetInput("TargetBox", {box_coder_third_input->Name()});
+    new_op_desc.SetInput("Scores", {multiclass_nms_second_input->Name()});
+    new_op_desc.SetAttr("code_type", code_type);
+    new_op_desc.SetAttr("box_normalized", box_normalized);
+    new_op_desc.SetAttr("background_label", background_label);
+    new_op_desc.SetAttr("score_threshold", score_threshold);
+    new_op_desc.SetAttr("nms_top_k", nms_top_k);
+    new_op_desc.SetAttr("nms_threshold", nms_threshold);
+    new_op_desc.SetAttr("nms_eta", nms_eta);
+    new_op_desc.SetAttr("keep_top_k", keep_top_k);
+    new_op_desc.SetOutput("Out", {multiclass_nms_out->Name()});
+    new_op_desc.Flush();
+
+    // Create a new node for the fused op.
+    auto *detection_out_op = graph->CreateOpNode(&new_op_desc);
+
+    std::unordered_set<const Node *> delete_nodes;
+
+    for (int i = 0; i < times; i++) {
+      nodes[i * kNumFields + kPriorBoxLocOffset]->outputs.push_back(concat_op1);
+      delete_nodes.insert(nodes[i * kNumFields + kReshape1Offset]);
+      delete_nodes.insert(nodes[i * kNumFields + kReshape1OutOffset]);
+      delete_nodes.insert(nodes[i * kNumFields + kPriorBoxVarOffset]);
+      delete_nodes.insert(nodes[i * kNumFields + kReshape2Offset]);
+      delete_nodes.insert(nodes[i * kNumFields + kReshape2OutOffset]);
+    }
+
+    delete_nodes.insert(concat_op1);
+    delete_nodes.insert(concat_op2);
+    delete_nodes.insert(concat_out2);
+    delete_nodes.insert(box_coder_op);
+    delete_nodes.insert(box_coder_out);
+    delete_nodes.insert(transpose_before_nms);
+    delete_nodes.insert(transpose_before_nms_out);
+    delete_nodes.insert(multiclass_nms);
+
+    new_add_concat_op->outputs.push_back(concat_out1);
+    concat_out1->inputs.push_back(new_add_concat_op);
+
+    detection_out_op->inputs.push_back(concat_out1);
+    detection_out_op->inputs.push_back(box_coder_third_input);
+    detection_out_op->inputs.push_back(multiclass_nms_second_input);
+    detection_out_op->outputs.push_back(multiclass_nms_out);
+
+    concat_out1->outputs.push_back(detection_out_op);
+    box_coder_third_input->outputs.push_back(detection_out_op);
+    multiclass_nms_second_input->outputs.push_back(detection_out_op);
+    multiclass_nms_out->inputs.push_back(detection_out_op);
+
+    // Delete the unneeded nodes.
+    GraphSafeRemoveNodes(graph.get(), delete_nodes);
+  };
+
+  gpd(graph.get(), handler);
+  return graph;
+}
+
+template class SimplifyAnakinDetectionPatternPass<1>;
+template class SimplifyAnakinDetectionPatternPass<2>;
+template class SimplifyAnakinDetectionPatternPass<3>;
+template class SimplifyAnakinDetectionPatternPass<4>;
+template class SimplifyAnakinDetectionPatternPass<5>;
+template class SimplifyAnakinDetectionPatternPass<6>;
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+REGISTER_PASS(simplify_anakin_detection_pattern_pass,
+              paddle::framework::ir::SimplifyAnakinDetectionPatternPass<1>);
+
+REGISTER_PASS(simplify_anakin_detection_pattern_pass2,
+              paddle::framework::ir::SimplifyAnakinDetectionPatternPass<2>);
+
+REGISTER_PASS(simplify_anakin_detection_pattern_pass3,
+              paddle::framework::ir::SimplifyAnakinDetectionPatternPass<3>);
+
+REGISTER_PASS(simplify_anakin_detection_pattern_pass4,
+              paddle::framework::ir::SimplifyAnakinDetectionPatternPass<4>);
+
+REGISTER_PASS(simplify_anakin_detection_pattern_pass5,
+              paddle::framework::ir::SimplifyAnakinDetectionPatternPass<5>);
+
+REGISTER_PASS(simplify_anakin_detection_pattern_pass6,
+              paddle::framework::ir::SimplifyAnakinDetectionPatternPass<6>);
--- a/paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.h
+++ b/paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <memory>
+#include <unordered_set>
+#include "paddle/fluid/framework/ir/fuse_pass_base.h"
+#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+// There may be many transpose-flatten structures in a model, and the output of
+// these structures will be used as inputs to the concat Op. This pattern will
+// be detected by our pass. The times here represents the repeat times of this
+// structure.
+template <int times>
+class SimplifyAnakinDetectionPatternPass : public FusePassBase {
+ public:
+  virtual ~SimplifyAnakinDetectionPatternPass() {}
+
+ protected:
+  std::unique_ptr<ir::Graph> ApplyImpl(
+      std::unique_ptr<ir::Graph> graph) const override;
+};
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc
@@ -12,7 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+#include <memory>
 #include <string>
+#include <unordered_set>
 #include <vector>

 #include "paddle/fluid/framework/ir/graph_viz_pass.h"
@@ -123,6 +125,7 @@ std::unique_ptr<ir::Graph> TransposeFlattenConcatFusePass<times>::ApplyImpl(
 }

 template class TransposeFlattenConcatFusePass<1>;
+template class TransposeFlattenConcatFusePass<2>;
 template class TransposeFlattenConcatFusePass<3>;
 template class TransposeFlattenConcatFusePass<4>;
 template class TransposeFlattenConcatFusePass<5>;
@@ -135,6 +138,9 @@ template class TransposeFlattenConcatFusePass<6>;
 REGISTER_PASS(transpose_flatten_concat_fuse_pass,
              paddle::framework::ir::TransposeFlattenConcatFusePass<1>);

+REGISTER_PASS(transpose_flatten2_concat_fuse_pass,
+              paddle::framework::ir::TransposeFlattenConcatFusePass<2>);
+
 REGISTER_PASS(transpose_flatten3_concat_fuse_pass,
              paddle::framework::ir::TransposeFlattenConcatFusePass<3>);


--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -16,7 +16,10 @@ add_subdirectory(utils)
 if (TENSORRT_FOUND)
  add_subdirectory(tensorrt)
 endif()
-# add_subdirectory(anakin)
+
+if (ANAKIN_FOUND)
+  add_subdirectory(anakin)
+endif()

 get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
 get_property(cuda_modules GLOBAL PROPERTY CUDA_MODULES)

--- a/paddle/fluid/inference/anakin/CMakeLists.txt
+++ b/paddle/fluid/inference/anakin/CMakeLists.txt
-cc_library(anakin_engine SRCS engine.cc)
+cc_library(anakin_engine SRCS engine.cc DEPS framework_proto)
+cc_library(anakin_op_teller SRCS op_teller.cc DEPS framework_proto)
 target_link_libraries(anakin_engine anakin anakin_saber_common)
 cc_test(test_anakin_engine SRCS test_anakin_engine.cc DEPS anakin_engine)
 add_subdirectory(convert)
--- a/paddle/fluid/inference/anakin/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/anakin/convert/CMakeLists.txt
-cc_library(anakin_op_converter SRCS fc.cc registrar.cc DEPS anakin_engine framework_proto scope)
-cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op)
+cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
+ elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc  softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc DEPS anakin_engine framework_proto scope op_registry)
+
+cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op SERIAL)
+cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL)
+cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS activation_op anakin_op_converter SERIAL)
+cc_test(test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling SERIAL)
+cc_test(test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split SERIAL)
+cc_test(test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split SERIAL)
+cc_test(test_anakin_elementwise SRCS test_elementwise_op.cc DEPS anakin_op_converter elementwise_add_op elementwise_mul_op SERIAL)
+cc_test(test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter SERIAL SERIAL)
+cc_test(test_anakin_softmax SRCS test_softmax_op.cc DEPS anakin_op_converter softmax_op softmax SERIAL)
+cc_test(test_anakin_reshape SRCS test_reshape_op.cc DEPS anakin_op_converter reshape_op SERIAL)
+cc_test(test_anakin_flatten SRCS test_flatten_op.cc DEPS anakin_op_converter flatten_op reshape_op SERIAL)
+cc_test(test_anakin_transpose SRCS test_transpose_op.cc DEPS anakin_op_converter transpose_op SERIAL)
+cc_test(test_anakin_batch_norm SRCS test_batch_norm_op.cc DEPS anakin_op_converter batch_norm_op SERIAL)
+cc_test(test_anakin_dropout SRCS test_dropout_op.cc DEPS anakin_op_converter dropout_op SERIAL)
+#cc_test(test_anakin_im2sequence SRCS test_im2sequence_op.cc DEPS anakin_op_converter im2sequence_op im2col)
+cc_test(test_anakin_sum SRCS test_sum_op.cc DEPS  anakin_op_converter sum_op selected_rows_functor SERIAL)
--- a/paddle/fluid/inference/anakin/convert/activation.cc
+++ b/paddle/fluid/inference/anakin/convert/activation.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/activation.h"
+#include <algorithm>
+#include <map>
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::saber::NV;
+using anakin::saber::Shape;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+ActivationOpConverter::ActivationOpConverter(const std::string &op_type)
+    : op_type_(op_type) {
+  auto it = anakin_op_types_.find(op_type_);
+  PADDLE_ENFORCE(it != anakin_op_types_.end(),
+                 "activation op type is not support");
+  anakin_op_type_ = it->second;
+}
+
+void ActivationOpConverter::operator()(const framework::proto::OpDesc &op,
+                                       const framework::Scope &scope,
+                                       bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
+  PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
+
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
+  auto input_name = op_desc.Input("X").front();
+  auto output_name = op_desc.Output("Out").front();
+  engine_->AddOp(op_name, "Activation", {input_name}, {output_name});
+  engine_->AddOpAttr(op_name, "type", anakin_op_type_);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_ANAKIN_OP_CONVERTER(sigmoid, SigmoidOpConverter);
+REGISTER_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter);
--- a/paddle/fluid/inference/anakin/convert/registrar.h
+++ b/paddle/fluid/inference/anakin/convert/registrar.h
@@ -14,45 +14,39 @@

 #pragma once

-#include <functional>
 #include <map>
-#include <memory>
 #include <string>
-#include <utility>
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"

 namespace paddle {
 namespace inference {
 namespace anakin {

-class AnakinOpConverter;
-
-class OpRegister {
+class ActivationOpConverter : public AnakinOpConverter {
 public:
-  OpRegister() = default;
-  std::shared_ptr<AnakinOpConverter> Get(const std::string &name);
-  static OpRegister *instance();
-  void OpRegisterFn(const std::string &name,
-                    std::function<std::shared_ptr<AnakinOpConverter>()> fn) {
-    registry_[name] = fn;
-  }
+  explicit ActivationOpConverter(const std::string &op_type);
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~ActivationOpConverter() {}

 private:
-  using RegisterFnType = std::function<std::shared_ptr<AnakinOpConverter>()>;
-  std::map<std::string, std::function<std::shared_ptr<AnakinOpConverter>()>>
-      registry_;
+  std::string op_type_;
+  std::string anakin_op_type_;
+  std::map<std::string, std::string> anakin_op_types_{{"tanh", "TanH"},
+                                                      {"sigmoid", "Sigmoid"}};
 };

-template <typename T, typename... Args>
-class Registrar {
+class TanhOpConverter : public ActivationOpConverter {
 public:
-  Registrar(const std::string &name, Args... args) {
-    std::shared_ptr<AnakinOpConverter> converter =
-        std::make_shared<T>(std::move(args)...);
-    OpRegister::instance()->OpRegisterFn(name,
-                                         [converter]() { return converter; });
-  }
+  TanhOpConverter() : ActivationOpConverter("tanh") {}
 };

+class SigmoidOpConverter : public ActivationOpConverter {
+ public:
+  SigmoidOpConverter() : ActivationOpConverter("sigmoid") {}
+};
 }  // namespace anakin
 }  // namespace inference
 }  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/batch_norm.cc
+++ b/paddle/fluid/inference/anakin/convert/batch_norm.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/batch_norm.h"
+#include <math.h>
+#include <algorithm>
+#include <map>
+#include <string>
+#include <vector>
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::saber::NV;
+using anakin::saber::Shape;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op,
+                                      const framework::Scope &scope,
+                                      bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 1);
+  std::map<std::string, std::string> inputs;
+  for (auto k : {"X", "Scale", "Bias", "Mean", "Variance"}) {
+    PADDLE_ENFORCE_EQ(op_desc.Input(k).size(), 1UL);
+    auto v = op_desc.Input(k).front();
+    inputs.insert({k, v});
+  }
+
+  auto output = op_desc.Output("Y").front();
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Y").front();
+  auto epsilon = boost::get<float>(op_desc.GetAttr("epsilon"));
+  // auto momentum = boost::get<float>(op_desc.GetAttr("momentum"));
+
+  auto bn_op_name = op_name + ":bn";
+  auto bn_output = bn_op_name + "_output";
+  engine_->AddOp(bn_op_name, "BatchNorm", {inputs["X"]}, {bn_output});
+  engine_->AddOpAttr(bn_op_name, "epsilon", epsilon);
+  engine_->AddOpAttr(bn_op_name, "momentum", static_cast<float>(1.0));
+
+  auto scale_op_name = op_name + ":scale";
+  auto get_lod_tensor = [this, &scope, &op_name](const std::string &var_name,
+                                                 framework::LoDTensor *tensor) {
+    auto *v = scope.FindVar(var_name);
+    PADDLE_ENFORCE_NOT_NULL(v);
+    auto *t = v->GetMutable<framework::LoDTensor>();
+    tensor->Resize(t->dims());
+    TensorCopySync(*t, platform::CPUPlace(), tensor);
+  };
+
+  framework::LoDTensor bias_t;
+  framework::LoDTensor mean_t;
+  framework::LoDTensor scale_t;
+  framework::LoDTensor variance_t;
+  get_lod_tensor(inputs["Bias"], &bias_t);
+  get_lod_tensor(inputs["Mean"], &mean_t);
+  get_lod_tensor(inputs["Scale"], &scale_t);
+  get_lod_tensor(inputs["Variance"], &variance_t);
+
+  auto fill_shape = [](size_t n, std::vector<int> shape) {
+    shape.insert(shape.begin(), 1);
+    if (shape.size() < n) {
+      shape.insert(shape.end(), n - shape.size(), 1);
+    }
+    return shape;
+  };
+  Shape shape1(fill_shape(4, framework::vectorize2int(mean_t.dims())));
+  Shape shape2(fill_shape(4, framework::vectorize2int(variance_t.dims())));
+  auto *weight1 =
+      GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(shape1);
+  auto *mean_data = static_cast<float *>(weight1->h_tensor().mutable_data());
+  std::copy_n(mean_t.data<float>(), mean_t.numel(), mean_data);
+  engine_->AddOpAttr(bn_op_name, "weight_1", *weight1);
+
+  auto *weight2 =
+      GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(shape2);
+  auto *variance_data =
+      static_cast<float *>(weight2->h_tensor().mutable_data());
+  std::copy_n(variance_t.data<float>(), variance_t.numel(), variance_data);
+  engine_->AddOpAttr(bn_op_name, "weight_2", *weight2);
+
+  Shape shape3(std::vector<int>({1, 1, 1, 1}));
+  auto *weight3 =
+      GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(shape3);
+  auto *alpha_data = static_cast<float *>(weight3->h_tensor().mutable_data());
+  float weight3_data[] = {1};
+  std::copy(std::begin(weight3_data), std::end(weight3_data), alpha_data);
+  engine_->AddOpAttr(bn_op_name, "weight_3", *weight3);
+
+  Shape scale_shape(fill_shape(4, framework::vectorize2int(scale_t.dims())));
+  auto *scale =
+      GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(scale_shape);
+  auto *scale_data = static_cast<float *>(scale->h_tensor().mutable_data());
+  std::copy_n(scale_t.data<float>(), scale_t.numel(), scale_data);
+
+  Shape bias_shape(fill_shape(4, framework::vectorize2int(bias_t.dims())));
+  auto *bias =
+      GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(bias_shape);
+  auto *bias_data = static_cast<float *>(bias->h_tensor().mutable_data());
+  std::copy_n(bias_t.data<float>(), bias_t.numel(), bias_data);
+
+  engine_->AddOp(scale_op_name, "Scale", {bn_output}, {output});
+  engine_->AddOpAttr(scale_op_name, "axis", 1);
+  engine_->AddOpAttr(scale_op_name, "num_axes", 1);
+  engine_->AddOpAttr(scale_op_name, "bias_term", true);
+  engine_->AddOpAttr(scale_op_name, "weight_1", *scale);
+  engine_->AddOpAttr(scale_op_name, "weight_2", *bias);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_ANAKIN_OP_CONVERTER(batch_norm, BatchNormOpConverter);
--- a/paddle/fluid/inference/anakin/convert/batch_norm.h
+++ b/paddle/fluid/inference/anakin/convert/batch_norm.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class BatchNormOpConverter : public AnakinOpConverter {
+ public:
+  BatchNormOpConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~BatchNormOpConverter() {}
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/concat.cc
+++ b/paddle/fluid/inference/anakin/convert/concat.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/concat.h"
+#include <algorithm>
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::Precision;
+using anakin::saber::NV;
+using anakin::saber::X86;
+using anakin::saber::Shape;
+using anakin::PBlock;
+using anakin::PTuple;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void ConcatOpConverter::operator()(const framework::proto::OpDesc &op,
+                                   const framework::Scope &scope,
+                                   bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  int axis = boost::get<int>(op_desc.GetAttr("axis"));
+  auto input_names = op_desc.Input("X");
+  // PADDLE_ENFORCE(axis > 0,
+  //               "The axis attr of Concat op should be large than 0 for trt");
+
+  auto y_name = op_desc.Output("Out").front();
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
+
+  engine_->AddOp(op_name, "Concat", input_names, {y_name});
+  engine_->AddOpAttr(op_name, "axis", axis);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_ANAKIN_OP_CONVERTER(concat, ConcatOpConverter);
--- a/paddle/fluid/inference/anakin/convert/concat.h
+++ b/paddle/fluid/inference/anakin/convert/concat.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class ConcatOpConverter : public AnakinOpConverter {
+ public:
+  ConcatOpConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~ConcatOpConverter() {}
+
+ private:
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/conv2d.cc
+++ b/paddle/fluid/inference/anakin/convert/conv2d.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/conv2d.h"
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::saber::NV;
+using anakin::saber::Shape;
+using anakin::PTuple;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op,
+                                   const framework::Scope &scope,
+                                   bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL);
+  PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL);
+  PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1UL);
+
+  auto input_name = op_desc.Input("Input").front();
+  auto output_name = op_desc.Output("Output").front();
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front();
+  engine_->AddOp(op_name, "Convolution", {input_name}, {output_name});
+
+  auto *filter_v = scope.FindVar(op_desc.Input("Filter").front());
+  PADDLE_ENFORCE_NOT_NULL(filter_v);
+  auto *filter_t = filter_v->GetMutable<framework::LoDTensor>();
+  std::unique_ptr<framework::LoDTensor> weight_tensor(
+      new framework::LoDTensor());
+  weight_tensor->Resize(filter_t->dims());
+  TensorCopySync((*filter_t), platform::CPUPlace(), weight_tensor.get());
+
+  PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL);
+
+  // const int n_output = weight_tensor->dims()[0];
+  // const int n_input = weight_tensor->dims()[1];
+  const int filter_h = weight_tensor->dims()[2];
+  const int filter_w = weight_tensor->dims()[3];
+  // auto filter_num = n_input * filter_h * filter_w ;
+  auto filter_num = weight_tensor->dims()[0];
+  engine_->AddOpAttr<int>(op_name, "filter_num", filter_num);
+  engine_->AddOpAttr<PTuple<int>>(op_name, "kernel_size", {filter_h, filter_w});
+  auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
+  engine_->AddOpAttr<PTuple<int>>(op_name, "strides", strides);
+  auto paddings = boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
+  engine_->AddOpAttr<PTuple<int>>(op_name, "padding", paddings);
+  auto dilations = boost::get<std::vector<int>>(op_desc.GetAttr("dilations"));
+  engine_->AddOpAttr<PTuple<int>>(op_name, "dilation_rate", dilations);
+  const int groups = boost::get<int>(op_desc.GetAttr("groups"));
+  engine_->AddOpAttr(op_name, "group", groups);
+  engine_->AddOpAttr(op_name, "axis", 1);
+  engine_->AddOpAttr(op_name, "bias_term", false);
+
+  auto weight_shape = framework::vectorize2int(filter_t->dims());
+  Shape anakin_shape(weight_shape);
+  auto *weight1 =
+      GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(anakin_shape);
+  float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
+  std::copy_n(weight_tensor->data<float>(), weight_tensor->numel(), cpu_data);
+  weight1->d_tensor().set_shape(anakin_shape);
+  weight1->d_tensor().copy_from(weight1->h_tensor());
+  engine_->AddOpAttr(op_name, "weight_1", *weight1);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_ANAKIN_OP_CONVERTER(conv2d, Conv2dOpConverter);
--- a/paddle/fluid/inference/anakin/convert/registrar.cc
+++ b/paddle/fluid/inference/anakin/convert/registrar.cc
@@ -12,22 +12,23 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/fluid/inference/anakin/convert/registrar.h"
+#pragma once
+
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"

 namespace paddle {
 namespace inference {
 namespace anakin {

-std::shared_ptr<AnakinOpConverter> OpRegister::Get(const std::string &name) {
-  auto it = registry_.find(name);
-  if (it == registry_.end()) return nullptr;
-  return it->second();
-}
+class Conv2dOpConverter : public AnakinOpConverter {
+ public:
+  Conv2dOpConverter() = default;

-OpRegister *OpRegister::instance() {
-  static OpRegister factory;
-  return &factory;
-}
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~Conv2dOpConverter() {}
+};

 }  // namespace anakin
 }  // namespace inference

--- a/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
+++ b/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/conv2d_fusion.h"
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::saber::NV;
+using anakin::saber::Shape;
+using anakin::PTuple;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op,
+                                         const framework::Scope &scope,
+                                         bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL);
+  PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL);
+  PADDLE_ENFORCE_EQ(op_desc.Input("Bias").size(), 1UL);
+  PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1UL);
+
+  auto input_name = op_desc.Input("Input").front();
+  auto output_name = op_desc.Output("Output").front();
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front();
+  engine_->AddOp(op_name, "Convolution", {input_name}, {output_name});
+
+  auto *filter_v = scope.FindVar(op_desc.Input("Filter").front());
+  PADDLE_ENFORCE_NOT_NULL(filter_v);
+  auto *filter_t = filter_v->GetMutable<framework::LoDTensor>();
+
+  auto *b_v = scope.FindVar(op_desc.Input("Bias").front());
+  PADDLE_ENFORCE_NOT_NULL(b_v);
+  auto *b_t = b_v->GetMutable<framework::LoDTensor>();
+
+  std::unique_ptr<framework::LoDTensor> weight_tensor(
+      new framework::LoDTensor());
+  weight_tensor->Resize(filter_t->dims());
+  TensorCopySync((*filter_t), platform::CPUPlace(), weight_tensor.get());
+
+  PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL);
+
+  // const int n_output = weight_tensor->dims()[0];
+  // const int n_input = weight_tensor->dims()[1];
+  const int filter_h = weight_tensor->dims()[2];
+  const int filter_w = weight_tensor->dims()[3];
+  // auto filter_num = n_input * filter_h * filter_w ;
+  auto filter_num = weight_tensor->dims()[0];
+  engine_->AddOpAttr<int>(op_name, "filter_num", filter_num);
+  engine_->AddOpAttr<PTuple<int>>(op_name, "kernel_size", {filter_h, filter_w});
+  auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
+  engine_->AddOpAttr<PTuple<int>>(op_name, "strides", strides);
+  auto paddings = boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
+  engine_->AddOpAttr<PTuple<int>>(op_name, "padding", paddings);
+  auto dilations = boost::get<std::vector<int>>(op_desc.GetAttr("dilations"));
+  engine_->AddOpAttr<PTuple<int>>(op_name, "dilation_rate", dilations);
+  const int groups = boost::get<int>(op_desc.GetAttr("groups"));
+  engine_->AddOpAttr(op_name, "group", groups);
+  engine_->AddOpAttr(op_name, "axis", 1);
+  engine_->AddOpAttr(op_name, "bias_term", true);
+
+  auto weight_shape = framework::vectorize2int(filter_t->dims());
+  Shape anakin_shape(weight_shape);
+  auto *weight1 =
+      GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(anakin_shape);
+  float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
+  std::copy_n(weight_tensor->data<float>(), weight_tensor->numel(), cpu_data);
+  weight1->d_tensor().set_shape(anakin_shape);
+  weight1->d_tensor().copy_from(weight1->h_tensor());
+  engine_->AddOpAttr(op_name, "weight_1", *weight1);
+
+  auto bias_shape = framework::vectorize2int(b_t->dims());
+  framework::LoDTensor bias_tensor;
+  bias_tensor.Resize(b_t->dims());
+  TensorCopySync((*b_t), platform::CPUPlace(), &bias_tensor);
+  auto *bias_data = bias_tensor.data<float>();
+  bias_shape.insert(bias_shape.begin(), 1);
+  bias_shape.insert(bias_shape.begin(), 1);
+  bias_shape.insert(bias_shape.begin(), 1);
+  // bias_shape.push_back(1);
+  // bias_shape.push_back(1);
+  Shape anakin_bias_shape(bias_shape);
+
+  auto *weight2 = GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(
+      anakin_bias_shape);
+  float *cpu_data2 = static_cast<float *>(weight2->h_tensor().mutable_data());
+  std::copy_n(bias_data, bias_tensor.numel(), cpu_data2);
+  weight2->d_tensor().set_shape(anakin_bias_shape);
+  weight2->d_tensor().copy_from(weight2->h_tensor());
+  engine_->AddOpAttr(op_name, "weight_2", *weight2);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_ANAKIN_OP_CONVERTER(conv2d_fusion, Conv2dFusionOpConverter);
--- a/paddle/fluid/inference/anakin/convert/conv2d_fusion.h
+++ b/paddle/fluid/inference/anakin/convert/conv2d_fusion.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class Conv2dFusionOpConverter : public AnakinOpConverter {
+ public:
+  Conv2dFusionOpConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~Conv2dFusionOpConverter() {}
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/density_prior_box.cc
+++ b/paddle/fluid/inference/anakin/convert/density_prior_box.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/density_prior_box.h"
+#include <algorithm>
+#include <map>
+#include <vector>
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::saber::NV;
+using anakin::saber::Shape;
+using anakin::PTuple;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void DensityPriorBoxOpConverter::operator()(const framework::proto::OpDesc& op,
+                                            const framework::Scope& scope,
+                                            bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  auto input_name = op_desc.Input("Input").front();
+  auto image_name = op_desc.Input("Image").front();
+  auto output_name = op_desc.Output("Boxes").front();
+
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Boxes").front();
+
+  auto fixed_sizes =
+      boost::get<std::vector<float>>(op_desc.GetAttr("fixed_sizes"));
+  auto fixed_ratios =
+      boost::get<std::vector<float>>(op_desc.GetAttr("fixed_ratios"));
+  auto densities = boost::get<std::vector<int>>(op_desc.GetAttr("densities"));
+  std::vector<float> dens;
+  for (auto& ele : densities) {
+    dens.push_back(static_cast<float>(ele));
+  }
+
+  // lack flip
+  // auto clip = boost::get<bool>(op_desc.GetAttr("clip"));
+  auto variances = boost::get<std::vector<float>>(op_desc.GetAttr("variances"));
+  for (auto& ele : variances) {
+    LOG(INFO) << ele;
+  }
+
+  // lack img_h, img_w
+  auto step_h = boost::get<float>(op_desc.GetAttr("step_h"));
+  auto step_w = boost::get<float>(op_desc.GetAttr("step_w"));
+  auto offset = boost::get<float>(op_desc.GetAttr("offset"));
+  PTuple<std::string> t_order;
+  t_order.push_back("MIN");
+  t_order.push_back("COM");
+  t_order.push_back("MAX");
+
+  std::vector<float> temp_v = {};
+
+  engine_->AddOp(op_name, "PriorBox", {input_name, image_name}, {output_name});
+  engine_->AddOpAttr<PTuple<float>>(op_name, "min_size", temp_v);
+  engine_->AddOpAttr<PTuple<float>>(op_name, "max_size", temp_v);
+  engine_->AddOpAttr<PTuple<float>>(op_name, "aspect_ratio", temp_v);
+  engine_->AddOpAttr<PTuple<float>>(op_name, "fixed_size", fixed_sizes);
+  engine_->AddOpAttr<PTuple<float>>(op_name, "fixed_ratio", fixed_ratios);
+  engine_->AddOpAttr<PTuple<float>>(op_name, "density", dens);
+  engine_->AddOpAttr(op_name, "is_flip", static_cast<bool>(false));
+  engine_->AddOpAttr(op_name, "is_clip", static_cast<bool>(false));
+  engine_->AddOpAttr<PTuple<float>>(op_name, "variance", variances);
+  engine_->AddOpAttr(op_name, "img_h", static_cast<int>(0));
+  engine_->AddOpAttr(op_name, "img_w", static_cast<int>(0));
+  engine_->AddOpAttr(op_name, "step_h", step_h);
+  engine_->AddOpAttr(op_name, "step_w", step_w);
+  engine_->AddOpAttr(op_name, "offset", offset);
+  engine_->AddOpAttr<PTuple<std::string>>(op_name, "order", t_order);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_ANAKIN_OP_CONVERTER(density_prior_box, DensityPriorBoxOpConverter);
--- a/paddle/fluid/inference/anakin/convert/density_prior_box.h
+++ b/paddle/fluid/inference/anakin/convert/density_prior_box.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class DensityPriorBoxOpConverter : public AnakinOpConverter {
+ public:
+  DensityPriorBoxOpConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~DensityPriorBoxOpConverter() {}
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/detection_out.cc
+++ b/paddle/fluid/inference/anakin/convert/detection_out.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/detection_out.h"
+#include <algorithm>
+#include <map>
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::saber::NV;
+using anakin::saber::Shape;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void DetectionOutOpConverter::operator()(const framework::proto::OpDesc &op,
+                                         const framework::Scope &scope,
+                                         bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  auto target_name = op_desc.Input("TargetBox").front();
+  auto prior_box_name = op_desc.Input("PriorBox").front();
+  auto scores_name = op_desc.Input("Scores").front();
+  auto output_name = op_desc.Output("Out").front();
+
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
+
+  auto code_type = boost::get<std::string>(op_desc.GetAttr("code_type"));
+  auto background_label = boost::get<int>(op_desc.GetAttr("background_label"));
+  auto score_threshold = boost::get<float>(op_desc.GetAttr("score_threshold"));
+  auto nms_top_k = boost::get<int>(op_desc.GetAttr("nms_top_k"));
+  auto nms_threshold = boost::get<float>(op_desc.GetAttr("nms_threshold"));
+  auto nms_eta = boost::get<float>(op_desc.GetAttr("nms_eta"));
+  auto keep_top_k = boost::get<int>(op_desc.GetAttr("keep_top_k"));
+  std::string anakin_code_type;
+  if (code_type == "decode_center_size") {
+    anakin_code_type = "CENTER_SIZE";
+  } else if (code_type == "encode_center_size") {
+    PADDLE_THROW(
+        "Not support encode_center_size code_type in DetectionOut of anakin");
+  }
+
+  engine_->AddOp(op_name, "DetectionOutput",
+                 {target_name, scores_name, prior_box_name}, {output_name});
+  engine_->AddOpAttr(op_name, "share_location", true);
+  engine_->AddOpAttr(op_name, "variance_encode_in_target", false);
+  engine_->AddOpAttr(op_name, "class_num", static_cast<int>(0));
+  engine_->AddOpAttr(op_name, "background_id", background_label);
+  engine_->AddOpAttr(op_name, "keep_top_k", keep_top_k);
+  engine_->AddOpAttr(op_name, "code_type", anakin_code_type);
+  engine_->AddOpAttr(op_name, "conf_thresh", score_threshold);
+  engine_->AddOpAttr(op_name, "nms_top_k", nms_top_k);
+  engine_->AddOpAttr(op_name, "nms_thresh", nms_threshold);
+  engine_->AddOpAttr(op_name, "nms_eta", nms_eta);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_ANAKIN_OP_CONVERTER(detection_out, DetectionOutOpConverter);
--- a/paddle/fluid/inference/anakin/convert/detection_out.h
+++ b/paddle/fluid/inference/anakin/convert/detection_out.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class DetectionOutOpConverter : public AnakinOpConverter {
+ public:
+  DetectionOutOpConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~DetectionOutOpConverter() {}
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/dropout.cc
+++ b/paddle/fluid/inference/anakin/convert/dropout.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/dropout.h"
+#include <algorithm>
+#include <string>
+#include <vector>
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::Precision;
+using anakin::saber::NV;
+using anakin::saber::X86;
+using anakin::saber::Shape;
+using anakin::PBlock;
+using anakin::PTuple;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void DropoutOpConverter::operator()(const framework::proto::OpDesc &op,
+                                    const framework::Scope &scope,
+                                    bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
+  PADDLE_ENFORCE_EQ(op_desc.Output("Mask").size(), 1);
+  PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
+
+  auto x_name = op_desc.Input("X").front();
+  auto out_name = op_desc.Output("Out").front();
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
+
+  engine_->AddOp(op_name, "Scale", {x_name}, {out_name});
+
+  auto dropout_prob = boost::get<float>(op_desc.GetAttr("dropout_prob"));
+  auto factor = 1 - dropout_prob;
+  Shape shape1(std::vector<int>({1, 1, 1, 1}));
+  auto *weight1 =
+      GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(shape1);
+  auto *factor_data = static_cast<float *>(weight1->h_tensor().mutable_data());
+  float weight1_data[] = {factor};
+  std::copy(std::begin(weight1_data), std::end(weight1_data), factor_data);
+
+  engine_->AddOpAttr(op_name, "weight_1", *weight1);
+  engine_->AddOpAttr(op_name, "axis", 0);
+  engine_->AddOpAttr(op_name, "num_axes", 0);
+  engine_->AddOpAttr(op_name, "bias_term", false);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_ANAKIN_OP_CONVERTER(dropout, DropoutOpConverter);
--- a/paddle/fluid/inference/anakin/convert/dropout.h
+++ b/paddle/fluid/inference/anakin/convert/dropout.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class DropoutOpConverter : public AnakinOpConverter {
+ public:
+  DropoutOpConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~DropoutOpConverter() {}
+
+ private:
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/elementwise.cc
+++ b/paddle/fluid/inference/anakin/convert/elementwise.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/elementwise.h"
+#include <algorithm>
+#include <string>
+#include <vector>
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::Precision;
+using anakin::saber::NV;
+using anakin::saber::X86;
+using anakin::saber::Shape;
+using anakin::PBlock;
+using anakin::PTuple;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void ElementwiseAddOpConverter::operator()(const framework::proto::OpDesc &op,
+                                           const framework::Scope &scope,
+                                           bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
+  PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1);
+  PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
+
+  auto x_name = op_desc.Input("X").front();
+  auto y_name = op_desc.Input("Y").front();
+  auto out_name = op_desc.Output("Out").front();
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
+
+  engine_->AddOp(op_name, "Eltwise", {x_name, y_name}, {out_name});
+  std::string elementwise_type = "Add";
+  engine_->AddOpAttr<std::string>(op_name, "type", elementwise_type);
+  std::vector<float> coeff = {1.0, 1.0};
+  engine_->AddOpAttr<PTuple<float>>(op_name, "coeff", coeff);
+}
+
+void ElementwiseMulOpConverter::operator()(const framework::proto::OpDesc &op,
+                                           const framework::Scope &scope,
+                                           bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
+  PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1);
+  PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
+
+  auto x_name = op_desc.Input("X").front();
+  auto y_name = op_desc.Input("Y").front();
+  auto out_name = op_desc.Output("Out").front();
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
+
+  engine_->AddOp(op_name, "Scale", {x_name, y_name}, {out_name});
+  // Fill a number to weight_1 as a placeholder.
+  Shape shape1(std::vector<int>({1, 1, 1, 1}));
+  auto *weight1 =
+      GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(shape1);
+  auto *placeholder_data =
+      static_cast<float *>(weight1->h_tensor().mutable_data());
+  float weight1_data[] = {1};
+  std::copy(std::begin(weight1_data), std::end(weight1_data), placeholder_data);
+  engine_->AddOpAttr(op_name, "weight_1", *weight1);
+
+  auto axis = boost::get<int>(op_desc.GetAttr("axis"));
+  engine_->AddOpAttr(op_name, "axis", axis);
+  engine_->AddOpAttr(op_name, "num_axes", 1);
+  engine_->AddOpAttr(op_name, "bias_term", false);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_ANAKIN_OP_CONVERTER(elementwise_add, ElementwiseAddOpConverter);
+REGISTER_ANAKIN_OP_CONVERTER(elementwise_mul, ElementwiseMulOpConverter);
--- a/paddle/fluid/inference/anakin/convert/elementwise.h
+++ b/paddle/fluid/inference/anakin/convert/elementwise.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class ElementwiseAddOpConverter : public AnakinOpConverter {
+ public:
+  ElementwiseAddOpConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~ElementwiseAddOpConverter() {}
+
+ private:
+};
+
+class ElementwiseMulOpConverter : public AnakinOpConverter {
+ public:
+  ElementwiseMulOpConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~ElementwiseMulOpConverter() {}
+
+ private:
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/fc.cc
+++ b/paddle/fluid/inference/anakin/convert/fc.cc
@@ -14,60 +14,108 @@

 #include "paddle/fluid/inference/anakin/convert/fc.h"
 #include <algorithm>
+#include <string>
+#include <vector>

 using anakin::graph::GraphGlobalMem;
 using anakin::AK_FLOAT;
-using anakin::Precision;
 using anakin::saber::NV;
-using anakin::saber::X86;
 using anakin::saber::Shape;
-using anakin::PBlock;
-using anakin::PTuple;

 namespace paddle {
 namespace inference {
 namespace anakin {

-void FcOpConverter::operator()(const framework::proto::OpDesc &op,
-                               const framework::Scope &scope, bool test_mode) {
+void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op,
+                                   const framework::Scope &scope,
+                                   bool test_mode) {
  framework::OpDesc op_desc(op, nullptr);
-  PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
-  PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1);
-  PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
+  auto input_names = op_desc.InputNames();
+  bool with_bias = input_names.size() == 3;
+
+  std::string w_name = "Y";
+  std::string i_name = "X";
+  if (with_bias) {
+    w_name = "W";
+    i_name = "Input";
+  }

-  auto x_name = op_desc.Input("X").front();
  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
-  auto *y_v = scope.FindVar(op_desc.Input("Y").front());
+
+  // get weights
+  auto *y_v = scope.FindVar(op_desc.Input(w_name).front());
  PADDLE_ENFORCE_NOT_NULL(y_v);
  auto *y_t = y_v->GetMutable<framework::LoDTensor>();

-  auto input_name = op_desc.Input("X").front();
+  auto input_name = op_desc.Input(i_name).front();
  auto output_name = op_desc.Output("Out").front();

-  auto weight_shape = framework::vectorize2int(y_t->dims());
  engine_->AddOp(op_name, "Dense", {input_name}, {output_name});
-  engine_->AddOpAttr(op_name, "bias_term", false);
+  engine_->AddOpAttr(op_name, "bias_term", with_bias);
  engine_->AddOpAttr(op_name, "axis", 1);
+
+  auto weight_shape = framework::vectorize2int(y_t->dims());
  int out_dim = weight_shape[1];
  engine_->AddOpAttr(op_name, "out_dim", out_dim);
+  const int w_m = weight_shape[0];
+  const int w_k = weight_shape[1];

-  weight_shape.push_back(1);
-  weight_shape.push_back(1);
+  if (weight_shape.size() < 4UL) {
+    weight_shape.insert(weight_shape.begin(), 4UL - weight_shape.size(), 1);
+  }
  Shape anakin_shape(weight_shape);

  framework::LoDTensor weight_tensor;
  weight_tensor.Resize(y_t->dims());
  TensorCopySync((*y_t), platform::CPUPlace(), &weight_tensor);
+  auto *weight_data = weight_tensor.data<float>();
+  PADDLE_ENFORCE(w_m * w_k == weight_tensor.numel());

+  std::vector<float> trans_weight_data(weight_tensor.numel());
+  for (int i = 0; i < w_m; i++) {
+    for (int j = 0; j < w_k; j++) {
+      trans_weight_data[i + j * w_m] = weight_data[i * w_k + j];
+    }
+  }
  auto *weight1 =
      GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(anakin_shape);
  float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
-  std::copy_n(weight_tensor.data<float>(), weight_tensor.numel(), cpu_data);
+  std::copy_n(trans_weight_data.data(), weight_tensor.numel(), cpu_data);
  weight1->d_tensor().set_shape(anakin_shape);
  weight1->d_tensor().copy_from(weight1->h_tensor());
  engine_->AddOpAttr(op_name, "weight_1", *weight1);
+
+  // get bias
+  if (with_bias) {
+    auto *b_v = scope.FindVar(op_desc.Input("Bias").front());
+    PADDLE_ENFORCE_NOT_NULL(b_v);
+    auto *b_t = b_v->GetMutable<framework::LoDTensor>();
+
+    auto bias_shape = framework::vectorize2int(b_t->dims());
+    framework::LoDTensor bias_tensor;
+    bias_tensor.Resize(b_t->dims());
+    TensorCopySync((*b_t), platform::CPUPlace(), &bias_tensor);
+    auto *bias_data = bias_tensor.data<float>();
+    bias_shape.insert(bias_shape.begin(), 1);
+    bias_shape.insert(bias_shape.begin(), 1);
+    bias_shape.insert(bias_shape.begin(), 1);
+    // bias_shape.push_back(1);
+    // bias_shape.push_back(1);
+    Shape anakin_bias_shape(bias_shape);
+
+    auto *weight2 = GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(
+        anakin_bias_shape);
+    float *cpu_data2 = static_cast<float *>(weight2->h_tensor().mutable_data());
+    std::copy_n(bias_data, bias_tensor.numel(), cpu_data2);
+    weight2->d_tensor().set_shape(anakin_bias_shape);
+    weight2->d_tensor().copy_from(weight2->h_tensor());
+    engine_->AddOpAttr(op_name, "weight_2", *weight2);
+  }
 }

 }  // namespace anakin
 }  // namespace inference
 }  // namespace paddle
+
+REGISTER_ANAKIN_OP_CONVERTER(mul, MulOpConverter);
+REGISTER_ANAKIN_OP_CONVERTER(fc, FcOpConverter);
--- a/paddle/fluid/inference/anakin/convert/fc.h
+++ b/paddle/fluid/inference/anakin/convert/fc.h
@@ -20,19 +20,28 @@ namespace paddle {
 namespace inference {
 namespace anakin {

-class FcOpConverter : public AnakinOpConverter {
+class FcBaseOpConverter : public AnakinOpConverter {
 public:
-  FcOpConverter() = default;
+  FcBaseOpConverter() = default;

  virtual void operator()(const framework::proto::OpDesc &op,
                          const framework::Scope &scope,
                          bool test_mode) override;
-  virtual ~FcOpConverter() {}
+  virtual ~FcBaseOpConverter() {}
+};

- private:
+// with bias
+class FcOpConverter : public FcBaseOpConverter {
+ public:
+  FcOpConverter() = default;
+};
+
+// without bias
+class MulOpConverter : public FcBaseOpConverter {
+ public:
+  MulOpConverter() = default;
 };

-static Registrar<FcOpConverter> register_fc_op_converter("fc");
 }  // namespace anakin
 }  // namespace inference
 }  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/flatten.cc
+++ b/paddle/fluid/inference/anakin/convert/flatten.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/flatten.h"
+#include <vector>
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::saber::NV;
+using anakin::saber::Shape;
+using anakin::PTuple;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void FlattenOpConverter::operator()(const framework::proto::OpDesc &op,
+                                    const framework::Scope &scope,
+                                    bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1UL);
+  PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1UL);
+
+  auto input = op_desc.Input("X").front();
+  auto output = op_desc.Output("Out").front();
+  int axis = boost::get<int>(op_desc.GetAttr("axis"));
+  PADDLE_ENFORCE(axis == 1,
+                 "the anakin flatten op converter now only support aixs == 1.");
+
+  std::vector<int> out_dims = {0, -1, 1, 1};
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
+  engine_->AddOp(op_name, "Reshape", {input}, {output});
+  engine_->AddOpAttr<PTuple<int>>(op_name, "dims", out_dims);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_ANAKIN_OP_CONVERTER(flatten, FlattenOpConverter);
--- a/paddle/fluid/inference/anakin/convert/flatten.h
+++ b/paddle/fluid/inference/anakin/convert/flatten.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class FlattenOpConverter : public AnakinOpConverter {
+ public:
+  FlattenOpConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~FlattenOpConverter() {}
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/im2sequence.cc
+++ b/paddle/fluid/inference/anakin/convert/im2sequence.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/im2sequence.h"
+#include <algorithm>
+#include <string>
+#include <vector>
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::Precision;
+using anakin::saber::NV;
+using anakin::saber::X86;
+using anakin::saber::Shape;
+using anakin::PBlock;
+using anakin::PTuple;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void Im2SequenceConverter::operator()(const framework::proto::OpDesc &op,
+                                      const framework::Scope &scope,
+                                      bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
+  PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 0);
+  PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
+
+  auto x_name = op_desc.Input("X").front();
+  auto out_name = op_desc.Output("Out").front();
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
+
+  engine_->AddOp(op_name, "Im2Sequence", {x_name}, {out_name});
+
+  std::vector<int> dilations = {1, 1};
+  auto paddings = boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
+  auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
+  auto kernels = boost::get<std::vector<int>>(op_desc.GetAttr("kernels"));
+
+  engine_->AddOpAttr<PTuple<int>>(op_name, "paddings", paddings);
+  engine_->AddOpAttr<PTuple<int>>(op_name, "strides", strides);
+  engine_->AddOpAttr<PTuple<int>>(op_name, "window_size", kernels);
+  engine_->AddOpAttr<PTuple<int>>(op_name, "dilations", dilations);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_ANAKIN_OP_CONVERTER(im2sequence, Im2SequenceConverter);
--- a/paddle/fluid/inference/anakin/convert/im2sequence.h
+++ b/paddle/fluid/inference/anakin/convert/im2sequence.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class Im2SequenceConverter : public AnakinOpConverter {
+ public:
+  Im2SequenceConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~Im2SequenceConverter() {}
+
+ private:
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/op_converter.h
+++ b/paddle/fluid/inference/anakin/convert/op_converter.h
@@ -14,15 +14,16 @@

 #pragma once

+#include <map>
 #include <memory>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
+#include <vector>
 #include "framework/core/types.h"
 #include "paddle/fluid/framework/block_desc.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/scope.h"
-#include "paddle/fluid/inference/anakin/convert/registrar.h"
 #include "paddle/fluid/inference/anakin/engine.h"
 #include "paddle/fluid/inference/utils/singleton.h"
 #include "saber/saber_types.h"
@@ -46,19 +47,14 @@ class AnakinOpConverter {
                 bool test_mode = false) {
    framework::OpDesc op_desc(op, nullptr);
    std::string op_type = op_desc.Type();
-    std::shared_ptr<AnakinOpConverter> it{nullptr};
-
-    if (op_type == "mul") {
-      PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL);
-      std::string Y = op_desc.Input("Y")[0];
-      std::cout << Y << parameters.count(Y) << std::endl;
-      if (parameters.count(Y)) {
-        it = OpRegister::instance()->Get("fc");
-      }
-    }
+    AnakinOpConverter *it = nullptr;
+
+    if (op_type == "reshape2") op_type = "reshape";
+    if (op_type == "transpose2") op_type = "transpose";
+    if (op_type == "flatten2") op_type = "flatten";

    if (!it) {
-      it = OpRegister::instance()->Get(op_type);
+      it = Registry<AnakinOpConverter>::Global().Lookup(op_type);
    }
    PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", op_type);
    it->SetEngine(engine);
@@ -74,6 +70,63 @@ class AnakinOpConverter {
      ConvertOp(op, parameters, scope, engine);
    }
  }
+
+  // The scope  here should be inited with the parameter vars.
+  void ConvertBlockToAnakinEngine(
+      framework::BlockDesc *block_desc, framework::Scope *scope,
+      const std::vector<std::string> &inputs,
+      const std::unordered_set<std::string> &parameters,
+      const std::vector<std::string> &outputs, AnakinNvEngine *engine) {
+    framework::proto::BlockDesc *block_proto = block_desc->Proto();
+    ConvertBlock(*block_proto, parameters, *scope, engine);
+
+    engine->Freeze();
+    // if the max_batch size
+    int max_batch_size = engine->GetMaxBatchSize();
+    PADDLE_ENFORCE(max_batch_size > 0,
+                   "the max_batch_size setted from config->EnableAnakinEngine "
+                   "must largger than 0");
+    // If the user does not specify this variable, we use the input shape from
+    // the block_desc.
+    auto max_input_shape = engine->GetMaxInputShape();
+    std::map<std::string, std::vector<int>> temp_max_input_shape;
+
+    for (auto &input : inputs) {
+      if (parameters.count(input)) continue;
+      std::vector<int> input_shape;
+      input_shape.resize(4);
+      input_shape[0] = max_batch_size;
+      if (max_input_shape.count(input)) {
+        PADDLE_ENFORCE(max_input_shape[input].size() == 4,
+                       "the dimensions of  max_input_shape setted from "
+                       "config->EnableAnakinEngine must be 4");
+        for (int i = 1; i < 4; i++) {
+          input_shape[i] = max_input_shape[input][i];
+        }
+      } else {
+        auto *var = block_desc->FindVar(input);
+        PADDLE_ENFORCE(var, "no variable called %s", input);
+
+        auto var_shape = var->GetShape();
+        std::cout << "input :" << input << std::endl;
+        PADDLE_ENFORCE(var_shape.size() == 4);
+
+        for (size_t i = 1; i < var_shape.size(); i++) {
+          input_shape[i] = var_shape[i];
+        }
+      }
+      temp_max_input_shape[input] = input_shape;
+      engine->SetInputShape(input, input_shape);
+      engine->Graph()->RegistVar(input);  // For share from data.
+    }
+    engine->SetMaxInputShape(temp_max_input_shape);
+    engine->Optimize();
+
+    // For anakin share with fluid tensor.
+    engine->AllocTmpMem();
+    engine->InitGraph();
+  }
+
  void SetEngine(AnakinNvEngine *engine) { engine_ = engine; }
  virtual ~AnakinOpConverter() {}

@@ -91,22 +144,23 @@ class AnakinOpConverter {
 }  // namespace inference
 }  // namespace paddle

-#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__)                \
-  struct anakin_##op_type__##_converter                                     \
-      : public ::paddle::framework::Registrar {                             \
-    anakin_##op_type__##_converter() {                                      \
-      ::paddle::inference::                                                 \
-          Registry<paddle::inference::anakin::AnakinOpConverter>::Register< \
-              ::paddle::inference::anakin::Converter__>(#op_type__);        \
-    }                                                                       \
-  };                                                                        \
-  anakin_##op_type__##_converter anakin_##op_type__##_converter__;          \
-  int TouchConverterRegister_anakin_##op_type__() {                         \
-    anakin_##op_type__##_converter__.Touch();                               \
-    return 0;                                                               \
+#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__)               \
+  struct anakin_##op_type__##_converter                                    \
+      : public ::paddle::framework::Registrar {                            \
+    anakin_##op_type__##_converter() {                                     \
+      LOG(INFO) << "register convert " << #op_type__;                      \
+      ::paddle::inference::Registry<                                       \
+          ::paddle::inference::anakin::AnakinOpConverter>::Global()        \
+          .Register<::paddle::inference::anakin::Converter__>(#op_type__); \
+    }                                                                      \
+  };                                                                       \
+  anakin_##op_type__##_converter anakin_##op_type__##_converter__;         \
+  int TouchConverterRegister_anakin_##op_type__() {                        \
+    anakin_##op_type__##_converter__.Touch();                              \
+    return 0;                                                              \
  }

-#define USE_ANAKIN_CONVERTER(op_type__)                                    \
-  extern int TouchConverterRegister_anakin_##op_type__();                  \
-  static int use_op_converter_anakin_##op_type__ __attribute__((unused)) = \
+#define USE_ANAKIN_CONVERTER(op_type__)                             \
+  extern int TouchConverterRegister_anakin_##op_type__();           \
+  int use_op_converter_anakin_##op_type__ __attribute__((unused)) = \
      TouchConverterRegister_anakin_##op_type__();
--- a/paddle/fluid/inference/anakin/convert/pool2d.cc
+++ b/paddle/fluid/inference/anakin/convert/pool2d.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/pool2d.h"
+#include <algorithm>
+#include <string>
+#include <vector>
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::Precision;
+using anakin::saber::NV;
+using anakin::saber::X86;
+using anakin::saber::Shape;
+using anakin::PBlock;
+using anakin::PTuple;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void Pool2dOpConverter::operator()(const framework::proto::OpDesc &op,
+                                   const framework::Scope &scope,
+                                   bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
+  PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
+
+  auto x_name = op_desc.Input("X").front();
+  auto y_name = op_desc.Output("Out").front();
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
+
+  bool global_pooling = boost::get<bool>(op_desc.GetAttr("global_pooling"));
+  std::string pool_type =
+      boost::get<std::string>(op_desc.GetAttr("pooling_type"));
+  std::vector<int> ksize =
+      boost::get<std::vector<int>>(op_desc.GetAttr("ksize"));
+  std::vector<int> strides =
+      boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
+  std::vector<int> paddings =
+      boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
+  bool ceil_mode = boost::get<bool>(op_desc.GetAttr("ceil_mode"));
+  std::string anakin_pool_type;
+  if (pool_type == "max") {
+    anakin_pool_type = "MAX";
+  } else if (pool_type == "avg") {
+    if (paddings[0] || paddings[1]) {
+      anakin_pool_type = "AVGEXC";
+    } else {
+      anakin_pool_type = "AVG";
+    }
+  } else {
+    PADDLE_THROW("TensorRT unsupported pooling type!");
+  }
+
+  engine_->AddOp(op_name, "Pooling", {x_name}, {y_name});
+  engine_->AddOpAttr<PTuple<int>>(op_name, "pool_size", ksize);
+  engine_->AddOpAttr<PTuple<int>>(op_name, "strides", strides);
+  engine_->AddOpAttr<PTuple<int>>(op_name, "padding", paddings);
+  engine_->AddOpAttr(op_name, "method", anakin_pool_type);
+  engine_->AddOpAttr(op_name, "global_pooling", global_pooling);
+  engine_->AddOpAttr(op_name, "cmp_out_shape_floor_as_conv", !ceil_mode);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_ANAKIN_OP_CONVERTER(pool2d, Pool2dOpConverter);
--- a/paddle/fluid/inference/anakin/convert/pool2d.h
+++ b/paddle/fluid/inference/anakin/convert/pool2d.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class Pool2dOpConverter : public AnakinOpConverter {
+ public:
+  Pool2dOpConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~Pool2dOpConverter() {}
+
+ private:
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/relu.cc
+++ b/paddle/fluid/inference/anakin/convert/relu.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/relu.h"
+#include <algorithm>
+#include <map>
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::saber::NV;
+using anakin::saber::Shape;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void ReluOpConverter::operator()(const framework::proto::OpDesc &op,
+                                 const framework::Scope &scope,
+                                 bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
+  PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
+
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
+  auto input_name = op_desc.Input("X").front();
+  auto output_name = op_desc.Output("Out").front();
+
+  engine_->AddOp(op_name, "ReLU", {input_name}, {output_name});
+  engine_->AddOpAttr(op_name, "alpha", 0);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_ANAKIN_OP_CONVERTER(relu, ReluOpConverter);
--- a/paddle/fluid/inference/anakin/convert/relu.h
+++ b/paddle/fluid/inference/anakin/convert/relu.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class ReluOpConverter : public AnakinOpConverter {
+ public:
+  ReluOpConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~ReluOpConverter() {}
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/reshape.cc
+++ b/paddle/fluid/inference/anakin/convert/reshape.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/reshape.h"
+#include <vector>
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::saber::NV;
+using anakin::saber::Shape;
+using anakin::PTuple;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void ReshapeOpConverter::operator()(const framework::proto::OpDesc &op,
+                                    const framework::Scope &scope,
+                                    bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1UL);
+  PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1UL);
+
+  auto input = op_desc.Input("X").front();
+  auto output = op_desc.Output("Out").front();
+
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
+  engine_->AddOp(op_name, "Reshape", {input}, {output});
+
+  auto shape = boost::get<std::vector<int>>(op_desc.GetAttr("shape"));
+  if (shape.size() < 4) {
+    shape.insert(shape.end(), 4 - shape.size(), 1);
+  }
+  engine_->AddOpAttr<PTuple<int>>(op_name, "dims", shape);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_ANAKIN_OP_CONVERTER(reshape, ReshapeOpConverter);
--- a/paddle/fluid/inference/anakin/convert/reshape.h
+++ b/paddle/fluid/inference/anakin/convert/reshape.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class ReshapeOpConverter : public AnakinOpConverter {
+ public:
+  ReshapeOpConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~ReshapeOpConverter() {}
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/scale.cc
+++ b/paddle/fluid/inference/anakin/convert/scale.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/scale.h"
+#include <algorithm>
+#include <map>
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::saber::NV;
+using anakin::saber::Shape;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void ScaleOpConverter::operator()(const framework::proto::OpDesc &op,
+                                  const framework::Scope &scope,
+                                  bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
+  PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
+
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
+
+  auto input_name = op_desc.Input("X").front();
+  auto output_name = op_desc.Output("Out").front();
+  float scale = boost::get<float>(op_desc.GetAttr("scale"));
+  float bias = boost::get<float>(op_desc.GetAttr("bias"));
+  float bias_after_scale =
+      boost::get<bool>(op_desc.GetAttr("bias_after_scale"));
+  PADDLE_ENFORCE(bias_after_scale,
+                 "The anakin scale layer only support bias after scale now.");
+
+  engine_->AddOp(op_name, "Power", {input_name}, {output_name});
+  engine_->AddOpAttr(op_name, "shift", bias);
+  engine_->AddOpAttr(op_name, "scale", scale);
+  engine_->AddOpAttr(op_name, "power", static_cast<float>(1.0));
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_ANAKIN_OP_CONVERTER(scale, ScaleOpConverter);
--- a/paddle/fluid/inference/anakin/convert/scale.h
+++ b/paddle/fluid/inference/anakin/convert/scale.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class ScaleOpConverter : public AnakinOpConverter {
+ public:
+  ScaleOpConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~ScaleOpConverter() {}
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/softmax.cc
+++ b/paddle/fluid/inference/anakin/convert/softmax.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/softmax.h"
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::saber::NV;
+using anakin::saber::Shape;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void SoftMaxOpConverter::operator()(const framework::proto::OpDesc &op,
+                                    const framework::Scope &scope,
+                                    bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1UL);
+
+  auto input = op_desc.Input("X").front();
+  auto output = op_desc.Output("Out").front();
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
+  engine_->AddOp(op_name, "Softmax", {input}, {output});
+  engine_->AddOpAttr(op_name, "axis", 2);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_ANAKIN_OP_CONVERTER(softmax, SoftMaxOpConverter);
--- a/paddle/fluid/inference/anakin/convert/softmax.h
+++ b/paddle/fluid/inference/anakin/convert/softmax.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class SoftMaxOpConverter : public AnakinOpConverter {
+ public:
+  SoftMaxOpConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~SoftMaxOpConverter() {}
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/split.cc
+++ b/paddle/fluid/inference/anakin/convert/split.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/split.h"
+#include <algorithm>
+#include <vector>
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::Precision;
+using anakin::saber::NV;
+using anakin::saber::X86;
+using anakin::saber::Shape;
+using anakin::PBlock;
+using anakin::PTuple;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void SplitOpConverter::operator()(const framework::proto::OpDesc &op,
+                                  const framework::Scope &scope,
+                                  bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  auto input_name = op_desc.Input("X").front();
+  auto y_names = op_desc.Output("Out");
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
+  int axis = boost::get<int>(op_desc.GetAttr("axis"));
+
+  std::vector<int> output_lengths =
+      boost::get<std::vector<int>>(op_desc.GetAttr("sections"));
+
+  int split_num = output_lengths.size();
+  PADDLE_ENFORCE(split_num > 1,
+                 "anakin split op converter: the split num should > 1");
+  int num_sum = 0;
+  std::vector<int> slice_point;
+  for (int i = 0; i < split_num - 1; i++) {
+    num_sum += output_lengths[i];
+    slice_point.push_back(num_sum);
+  }
+  engine_->AddOp(op_name, "Slice", {input_name}, y_names);
+  engine_->AddOpAttr(op_name, "axis", axis);
+  engine_->AddOpAttr<PTuple<int>>(op_name, "slice_point", slice_point);
+  // slice_dim is useless in anakin
+  engine_->AddOpAttr(op_name, "slice_dim", 4);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+REGISTER_ANAKIN_OP_CONVERTER(split, SplitOpConverter);
--- a/paddle/fluid/inference/anakin/convert/split.h
+++ b/paddle/fluid/inference/anakin/convert/split.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class SplitOpConverter : public AnakinOpConverter {
+ public:
+  SplitOpConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~SplitOpConverter() {}
+
+ private:
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/sum.cc
+++ b/paddle/fluid/inference/anakin/convert/sum.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/sum.h"
+#include <algorithm>
+#include <string>
+#include <vector>
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::Precision;
+using anakin::saber::NV;
+using anakin::saber::X86;
+using anakin::saber::Shape;
+using anakin::PBlock;
+using anakin::PTuple;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void SumOpConverter::operator()(const framework::proto::OpDesc &op,
+                                const framework::Scope &scope, bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 2);
+  PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
+
+  auto input_names = op_desc.Input("X");
+  auto out_name = op_desc.Output("Out").front();
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
+
+  std::vector<float> coeff = {1, 1};
+  std::string elementwise_type = "Add";
+  engine_->AddOp(op_name, "Eltwise", input_names, {out_name});
+  engine_->AddOpAttr<PTuple<float>>(op_name, "coeff", coeff);
+  engine_->AddOpAttr<std::string>(op_name, "type", elementwise_type);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_ANAKIN_OP_CONVERTER(sum, SumOpConverter);
--- a/paddle/fluid/inference/anakin/convert/sum.h
+++ b/paddle/fluid/inference/anakin/convert/sum.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class SumOpConverter : public AnakinOpConverter {
+ public:
+  SumOpConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~SumOpConverter() {}
+
+ private:
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/test_activation_op.cc
+++ b/paddle/fluid/inference/anakin/convert/test_activation_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/inference/anakin/convert/activation.h"
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+static void test_activation_op(const std::string &op_type) {
+  auto *converter = Registry<AnakinOpConverter>::Global().Lookup(op_type);
+  PADDLE_ENFORCE(converter != nullptr);
+  std::unordered_set<std::string> parameters;
+  framework::Scope scope;
+  AnakinConvertValidation validator(parameters, &scope);
+  validator.DeclInputVar("act-X", {10, 6, 1, 1});
+  validator.DeclOutputVar("act-Out", {10, 6, 1, 1});
+  framework::OpDesc desc;
+  desc.SetType(op_type);
+  desc.SetInput("X", {"act-X"});
+  desc.SetOutput("Out", {"act-Out"});
+
+  LOG(INFO) << "set OP";
+  validator.SetOp(*desc.Proto());
+  LOG(INFO) << "execute";
+
+  validator.Execute(5);
+}
+
+TEST(sigm_op, test) { test_activation_op("sigmoid"); }
+TEST(tanh_op, test) { test_activation_op("tanh"); }
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+USE_OP(sigmoid);
+USE_OP(tanh);
+USE_ANAKIN_CONVERTER(sigmoid);
+USE_ANAKIN_CONVERTER(tanh);
--- a/paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
+++ b/paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+TEST(batch_norm_op, test) {
+  std::unordered_set<std::string> parameters(
+      {"batch_norm_scale", "batch_norm_bias", "batch_norm_mean",
+       "batch_norm_variance"});
+  framework::Scope scope;
+  AnakinConvertValidation validator(parameters, &scope);
+  std::vector<int> param_shape{2};
+
+  validator.DeclInputVar("batch_norm_X", {1, 2, 5, 5});
+  validator.DeclParamVar("batch_norm_scale", param_shape);
+  validator.DeclParamVar("batch_norm_bias", param_shape);
+  validator.DeclParamVar("batch_norm_mean", param_shape);
+  validator.DeclParamVar("batch_norm_variance", param_shape);
+  validator.DeclOutputVar("batch_norm_Y", {1, 2, 5, 5});
+  validator.DeclOutputVar("batch_norm_save_mean", param_shape);
+  validator.DeclOutputVar("batch_norm_save_variance", param_shape);
+
+  // Prepare Op description
+  framework::OpDesc desc;
+
+  desc.SetType("batch_norm");
+  desc.SetInput("X", {"batch_norm_X"});
+  desc.SetInput("Scale", {"batch_norm_scale"});
+  desc.SetInput("Bias", {"batch_norm_bias"});
+  desc.SetInput("Mean", {"batch_norm_mean"});
+  desc.SetInput("Variance", {"batch_norm_variance"});
+  desc.SetOutput("Y", {"batch_norm_Y"});
+  desc.SetOutput("MeanOut", {"batch_norm_mean"});
+  desc.SetOutput("VarianceOut", {"batch_norm_variance"});
+  desc.SetOutput("SavedMean", {"batch_norm_save_mean"});
+  desc.SetOutput("SavedVariance", {"batch_norm_save_variance"});
+
+  float eps = 1e-5f;
+  bool is_test = true;
+  desc.SetAttr("epsilon", eps);
+  desc.SetAttr("is_test", is_test);
+
+  validator.SetOp(*desc.Proto());
+
+  std::unordered_set<std::string> neglected_output = {
+      "batch_norm_save_mean", "batch_norm_save_variance", "batch_norm_mean",
+      "batch_norm_variance"};
+  validator.Execute(1, neglected_output);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+USE_OP(batch_norm);
+USE_ANAKIN_CONVERTER(batch_norm);
--- a/paddle/fluid/inference/anakin/convert/test_concat_op.cc
+++ b/paddle/fluid/inference/anakin/convert/test_concat_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/inference/anakin/convert/concat.h"
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+TEST(concat_op, test) {
+  std::unordered_set<std::string> parameters({""});
+  framework::Scope scope;
+  AnakinConvertValidation validator(parameters, &scope);
+  validator.DeclInputVar("concat_x1", {1, 2, 1, 1});
+  validator.DeclInputVar("concat_x2", {1, 3, 1, 1});
+  validator.DeclInputVar("concat_x3", {1, 1, 1, 1});
+  validator.DeclOutputVar("concat_out", {1, 6, 1, 1});
+
+  // Prepare Op description
+  framework::OpDesc desc;
+  desc.SetType("concat");
+  desc.SetInput("X", {"concat_x1", "concat_x2", "concat_x3"});
+  desc.SetOutput("Out", {"concat_out"});
+
+  int axis = 1;
+  desc.SetAttr("axis", axis);
+
+  validator.SetOp(*desc.Proto());
+
+  validator.Execute(1);
+}
+
+TEST(concat_op, test2) {
+  std::unordered_set<std::string> parameters({""});
+  framework::Scope scope;
+  AnakinConvertValidation validator(parameters, &scope);
+  validator.DeclInputVar("concat_x1", {1, 4});
+  validator.DeclInputVar("concat_x2", {3, 4});
+  validator.DeclInputVar("concat_x3", {2, 4});
+  validator.DeclOutputVar("concat_out", {6, 4});
+
+  // Prepare Op description
+  framework::OpDesc desc;
+  desc.SetType("concat");
+  desc.SetInput("X", {"concat_x1", "concat_x2", "concat_x3"});
+  desc.SetOutput("Out", {"concat_out"});
+
+  int axis = 0;
+  desc.SetAttr("axis", axis);
+
+  validator.SetOp(*desc.Proto());
+
+  validator.Execute(1);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+USE_OP(concat);
+USE_ANAKIN_CONVERTER(concat);
--- a/paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
+++ b/paddle/fluid/inference/anakin/convert/test_conv2d_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/inference/anakin/convert/conv2d.h"
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+TEST(conv2d_op, test) {
+  auto* conv2d_converter =
+      Registry<AnakinOpConverter>::Global().Lookup("conv2d");
+  ASSERT_TRUE(conv2d_converter != nullptr);
+  std::unordered_set<std::string> parameters({"conv2d-Y"});
+  framework::Scope scope;
+  AnakinConvertValidation validator(parameters, &scope);
+  validator.DeclInputVar("conv2d-X", {1, 3, 3, 3});
+  validator.DeclParamVar("conv2d-Y", {4, 3, 1, 1});
+  validator.DeclOutputVar("conv2d-Out", {1, 4, 3, 3});
+
+  // Prepare Op description
+  framework::OpDesc desc;
+  desc.SetType("conv2d");
+  desc.SetInput("Input", {"conv2d-X"});
+  desc.SetInput("Filter", {"conv2d-Y"});
+  desc.SetOutput("Output", {"conv2d-Out"});
+
+  const std::vector<int> strides({1, 1});
+  const std::vector<int> paddings({0, 0});
+  const std::vector<int> dilations({1, 1});
+  const int groups = 1;
+
+  desc.SetAttr("strides", strides);
+  desc.SetAttr("paddings", paddings);
+  desc.SetAttr("dilations", dilations);
+  desc.SetAttr("groups", groups);
+
+  validator.SetOp(*desc.Proto());
+
+  validator.Execute(3);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+USE_OP(conv2d);
+USE_ANAKIN_CONVERTER(conv2d);
--- a/paddle/fluid/inference/anakin/convert/test_dropout_op.cc
+++ b/paddle/fluid/inference/anakin/convert/test_dropout_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/inference/anakin/convert/dropout.h"
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+TEST(dropout_op, native) {
+  std::unordered_set<std::string> parameters;
+  framework::Scope scope;
+  AnakinConvertValidation validator(parameters, &scope);
+  validator.DeclInputVar("x", {1, 1, 2, 2});
+  validator.DeclOutputVar("out", {1, 1, 2, 2});
+  validator.DeclOutputVar("mask", {1, 1, 2, 2});
+
+  // Prepare Op description
+  framework::OpDesc desc;
+  desc.SetType("dropout");
+  desc.SetInput("X", {"x"});
+  desc.SetOutput("Out", {"out"});
+  desc.SetOutput("Mask", {"mask"});
+
+  float dropout_prob = 0.5;
+  desc.SetAttr("dropout_prob", dropout_prob);
+  desc.SetAttr("is_test", true);
+
+  validator.SetOp(*desc.Proto());
+  std::unordered_set<std::string> neglected_output = {"mask"};
+  validator.Execute(1, neglected_output);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+USE_OP(dropout);
+USE_ANAKIN_CONVERTER(dropout);
--- a/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
+++ b/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/inference/anakin/convert/elementwise.h"
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+static void test_elementwise_op(const std::string &op_type) {
+  std::unordered_set<std::string> parameters;
+  framework::Scope scope;
+  AnakinConvertValidation validator(parameters, &scope);
+  validator.DeclInputVar("x", {1, 1, 2, 2});
+  validator.DeclInputVar("y", {1, 1, 2, 2});
+  validator.DeclOutputVar("out", {1, 1, 2, 2});
+
+  // Prepare Op description
+  framework::OpDesc desc;
+  desc.SetType(op_type);
+  desc.SetInput("X", {"x"});
+  desc.SetInput("Y", {"y"});
+  desc.SetOutput("Out", {"out"});
+
+  int axis = -1;
+  desc.SetAttr("axis", axis);
+
+  validator.SetOp(*desc.Proto());
+  validator.Execute(1);
+}
+
+TEST(elementwise_op, native_add) { test_elementwise_op("elementwise_add"); }
+TEST(elementwise_op, native_mul) { test_elementwise_op("elementwise_mul"); }
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+USE_OP(elementwise_add);
+USE_ANAKIN_CONVERTER(elementwise_add);
+USE_OP(elementwise_mul);
+USE_ANAKIN_CONVERTER(elementwise_mul);
--- a/paddle/fluid/inference/anakin/convert/test_fc_op.cc
+++ b/paddle/fluid/inference/anakin/convert/test_fc_op.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include <gtest/gtest.h>
-#include "paddle/fluid/inference/anakin/convert/fc.h"
 #include "paddle/fluid/inference/anakin/convert/op_converter.h"
 #include "paddle/fluid/inference/anakin/convert/ut_helper.h"

@@ -22,17 +21,15 @@ namespace inference {
 namespace anakin {

 TEST(fc_op, test) {
-  auto fc_converter = OpRegister::instance()->Get("fc");
-  ASSERT_TRUE(fc_converter != nullptr);
-  // Registrar<FcOpConverter> register_fc("fc");
-  // auto fc = std::make_shared<FcOpConverter>();
+  auto* fc_converter = Registry<AnakinOpConverter>::Global().Lookup("fc");
+  ASSERT_TRUE(fc_converter);

  std::unordered_set<std::string> parameters({"mul_y"});
  framework::Scope scope;
-  AnakinConvertValidation validator(parameters, scope);
-  validator.DeclInputVar("mul_x", {1, 1, 1, 1});
-  validator.DeclParamVar("mul_y", {1, 2});
-  validator.DeclOutputVar("mul_out", {1, 1, 1, 2});
+  AnakinConvertValidation validator(parameters, &scope);
+  validator.DeclInputVar("mul_x", {1, 1, 2, 2});
+  validator.DeclParamVar("mul_y", {4, 2});
+  validator.DeclOutputVar("mul_out", {1, 2});

  // Prepare Op description
  framework::OpDesc desc;
@@ -40,8 +37,6 @@ TEST(fc_op, test) {
  desc.SetInput("X", {"mul_x"});
  desc.SetInput("Y", {"mul_y"});
  desc.SetOutput("Out", {"mul_out"});
-  int num_flatten_dims = 3;
-  desc.SetAttr("x_num_col_dims", num_flatten_dims);
  validator.SetOp(*desc.Proto());

  validator.Execute(10);
@@ -52,3 +47,4 @@ TEST(fc_op, test) {
 }  // namespace paddle

 USE_OP(mul);
+USE_ANAKIN_CONVERTER(fc);
--- a/paddle/fluid/inference/anakin/convert/test_flatten_op.cc
+++ b/paddle/fluid/inference/anakin/convert/test_flatten_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+TEST(flatten_op, test) {
+  auto *converter = Registry<AnakinOpConverter>::Global().Lookup("flatten");
+  ASSERT_TRUE(converter);
+
+  std::unordered_set<std::string> parameters;
+  framework::Scope scope;
+  AnakinConvertValidation validator(parameters, &scope);
+  validator.DeclInputVar("flatten-X", {3, 10, 10, 4});
+  validator.DeclOutputVar("flatten-Out", {3, 400, 1, 1});
+  framework::OpDesc desc;
+  desc.SetType("flatten");
+  desc.SetInput("X", {"flatten-X"});
+  desc.SetOutput("Out", {"flatten-Out"});
+  desc.SetAttr("axis", 1);
+
+  LOG(INFO) << "set OP";
+  validator.SetOp(*desc.Proto());
+  LOG(INFO) << "execute";
+
+  validator.Execute(5);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+USE_OP(reshape);
+USE_OP_ITSELF(flatten);
+USE_ANAKIN_CONVERTER(flatten);
--- a/paddle/fluid/inference/anakin/convert/test_im2sequence_op.cc
+++ b/paddle/fluid/inference/anakin/convert/test_im2sequence_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/inference/anakin/convert/im2sequence.h"
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+TEST(im2sequence_op, native) {
+  std::unordered_set<std::string> parameters;
+  framework::Scope scope;
+  AnakinConvertValidation validator(parameters, &scope);
+
+  std::vector<int> kernels = {6, 1};
+  std::vector<int> strides = {1, 1};
+  std::vector<int> paddings = {0, 0, 0, 0};
+
+  validator.DeclInputVar("x", {1, 1, 2, 2});
+  validator.DeclOutputVar("out", {1, 1 * kernels[0] * kernels[1]});
+
+  // Prepare Op description
+  framework::OpDesc desc;
+  desc.SetType("im2sequence");
+  desc.SetInput("X", {"x"});
+  desc.SetOutput("Out", {"out"});
+
+  desc.SetAttr("kernels", kernels);
+  desc.SetAttr("strides", strides);
+  desc.SetAttr("paddings", paddings);
+
+  validator.SetOp(*desc.Proto());
+  validator.Execute(1);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+USE_OP(im2sequence);
+USE_ANAKIN_CONVERTER(im2sequence);
--- a/paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
+++ b/paddle/fluid/inference/anakin/convert/test_pool2d_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+#include <gtest/gtest.h>
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void test_pool2d(bool global_pooling, bool ceil_mode,
+                 std::string pool_type = "max") {
+  auto* pool2d_converter =
+      Registry<AnakinOpConverter>::Global().Lookup("pool2d");
+  ASSERT_TRUE(pool2d_converter);
+
+  framework::Scope scope;
+  std::unordered_set<std::string> parameters;
+  AnakinConvertValidation validator(parameters, &scope);
+
+  // The ITensor's Dims should not contain the batch size.
+  // So, the ITensor's Dims of input and output should be C * H * W.
+  validator.DeclInputVar("pool2d_x", {1, 3, 6, 7});
+  if (global_pooling)
+    validator.DeclOutputVar("pool2d_out", {1, 3, 1, 1});
+  else if (ceil_mode)
+    validator.DeclOutputVar("pool2d_out", {1, 3, 3, 4});
+  else
+    validator.DeclOutputVar("pool2d_out", {1, 3, 3, 3});
+
+  // Prepare Op description
+  framework::OpDesc desc;
+  desc.SetType("pool2d");
+  desc.SetInput("X", {"pool2d_x"});
+  desc.SetOutput("Out", {"pool2d_out"});
+
+  std::vector<int> ksize({2, 2});
+  std::vector<int> strides({2, 2});
+  std::vector<int> paddings({0, 0});
+  std::string pooling_t = pool_type;
+
+  desc.SetAttr("pooling_type", pooling_t);
+  desc.SetAttr("ksize", ksize);
+  desc.SetAttr("strides", strides);
+  desc.SetAttr("paddings", paddings);
+  desc.SetAttr("global_pooling", global_pooling);
+  desc.SetAttr("ceil_mode", ceil_mode);
+
+  LOG(INFO) << "set OP";
+  validator.SetOp(*desc.Proto());
+  LOG(INFO) << "execute";
+
+  validator.Execute(1);
+}
+
+void test_pool2d2(bool global_pooling, bool ceil_mode,
+                  std::string pool_type = "max") {
+  auto* pool2d_converter =
+      Registry<AnakinOpConverter>::Global().Lookup("pool2d");
+  ASSERT_TRUE(pool2d_converter);
+
+  framework::Scope scope;
+  std::unordered_set<std::string> parameters;
+  AnakinConvertValidation validator(parameters, &scope);
+
+  // The ITensor's Dims should not contain the batch size.
+  // So, the ITensor's Dims of input and output should be C * H * W.
+  validator.DeclInputVar("pool2d_x", {1, 1, 17, 17});
+  validator.DeclOutputVar("pool2d_out", {1, 1, 17, 17});
+
+  // Prepare Op description
+  framework::OpDesc desc;
+  desc.SetType("pool2d");
+  desc.SetInput("X", {"pool2d_x"});
+  desc.SetOutput("Out", {"pool2d_out"});
+
+  std::vector<int> ksize({3, 3});
+  std::vector<int> strides({1, 1});
+  std::vector<int> paddings({1, 1});
+  std::string pooling_t = pool_type;
+
+  desc.SetAttr("pooling_type", pooling_t);
+  desc.SetAttr("ksize", ksize);
+  desc.SetAttr("strides", strides);
+  desc.SetAttr("paddings", paddings);
+  desc.SetAttr("global_pooling", global_pooling);
+  desc.SetAttr("ceil_mode", true);
+
+  LOG(INFO) << "set OP";
+  validator.SetOp(*desc.Proto());
+  LOG(INFO) << "execute";
+
+  validator.Execute(1);
+}
+
+TEST(Pool2dOpConverter, normal) { test_pool2d(false, false); }
+TEST(Pool2dOpConverter, test_global_pooling) { test_pool2d(true, false); }
+
+TEST(Pool2dOpConverter, max_ceil_test) { test_pool2d(false, true); }
+TEST(Pool2dOpConverter, avg_ceil_test) { test_pool2d(false, true, "avg"); }
+TEST(Pool2dOpConverter, avg_ceil_test2) { test_pool2d2(false, true, "avg"); }
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+USE_OP(pool2d);
+USE_ANAKIN_CONVERTER(pool2d);
--- a/paddle/fluid/inference/anakin/convert/test_relu_op.cc
+++ b/paddle/fluid/inference/anakin/convert/test_relu_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+#include "paddle/fluid/inference/anakin/convert/relu.h"
+#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+static void test_activation_op(const std::string &op_type) {
+  auto *converter = Registry<AnakinOpConverter>::Global().Lookup(op_type);
+  PADDLE_ENFORCE(converter != nullptr);
+  std::unordered_set<std::string> parameters;
+  framework::Scope scope;
+  AnakinConvertValidation validator(parameters, &scope);
+  validator.DeclInputVar("act-X", {10, 6, 1, 1});
+  validator.DeclOutputVar("act-Out", {10, 6, 1, 1});
+  framework::OpDesc desc;
+  desc.SetType(op_type);
+  desc.SetInput("X", {"act-X"});
+  desc.SetOutput("Out", {"act-Out"});
+
+  LOG(INFO) << "set OP";
+  validator.SetOp(*desc.Proto());
+  LOG(INFO) << "execute";
+
+  validator.Execute(5);
+}
+
+TEST(sigm_op, test) { test_activation_op("relu"); }
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+USE_OP(relu);
+USE_ANAKIN_CONVERTER(relu);
--- a/paddle/fluid/inference/anakin/convert/test_reshape_op.cc
+++ b/paddle/fluid/inference/anakin/convert/test_reshape_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+TEST(reshape, test) {
+  auto* converter = Registry<AnakinOpConverter>::Global().Lookup("reshape");
+  ASSERT_TRUE(converter);
+  framework::Scope scope;
+  std::unordered_set<std::string> parameters;
+  AnakinConvertValidation validator(parameters, &scope);
+
+  // validator.DeclInputVar("reshape-X", {2, 3, 3, 1});
+  // validator.DeclOutputVar("reshape-Out", {3, 2, 1, 3});
+  validator.DeclInputVar("reshape-X", {1, 2, 4, 1});
+  validator.DeclOutputVar("reshape-Out", {1, 8, 1, 1});
+
+  framework::OpDesc desc;
+  desc.SetType("reshape");
+  desc.SetInput("X", {"reshape-X"});
+  desc.SetOutput("Out", {"reshape-Out"});
+  // desc.SetAttr("shape", std::vector<int>({3, 2, 1, 3}));
+  desc.SetAttr("shape", std::vector<int>({1, 8, 1, 1}));
+
+  LOG(INFO) << "set OP";
+  validator.SetOp(*desc.Proto());
+  LOG(INFO) << "execute";
+  validator.Execute(1);
+}
+
+TEST(reshape, test2) {
+  framework::Scope scope;
+  std::unordered_set<std::string> parameters;
+  AnakinConvertValidation validator(parameters, &scope);
+
+  validator.DeclInputVar("reshape-X", {1, 2, 4});
+  validator.DeclOutputVar("reshape-Out", {1, 4, 2});
+
+  framework::OpDesc desc;
+  desc.SetType("reshape");
+  desc.SetInput("X", {"reshape-X"});
+  desc.SetOutput("Out", {"reshape-Out"});
+  // desc.SetAttr("shape", std::vector<int>({3, 2, 1, 3}));
+  desc.SetAttr("shape", std::vector<int>({0, -1, 2}));
+
+  LOG(INFO) << "set OP";
+  validator.SetOp(*desc.Proto());
+  LOG(INFO) << "execute";
+  validator.Execute(1);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+USE_OP(reshape);
+USE_ANAKIN_CONVERTER(reshape);
--- a/paddle/fluid/inference/anakin/convert/test_softmax_op.cc
+++ b/paddle/fluid/inference/anakin/convert/test_softmax_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+TEST(softmax, test) {
+  auto* converter = Registry<AnakinOpConverter>::Global().Lookup("softmax");
+  ASSERT_TRUE(converter);
+  framework::Scope scope;
+  std::unordered_set<std::string> parameters;
+  AnakinConvertValidation validator(parameters, &scope);
+
+  validator.DeclInputVar("softmax-X", {1, 10, 2});
+  validator.DeclOutputVar("softmax-Out", {1, 10, 2});
+
+  framework::OpDesc desc;
+  desc.SetType("softmax");
+  desc.SetInput("X", {"softmax-X"});
+  desc.SetOutput("Out", {"softmax-Out"});
+
+  LOG(INFO) << "set OP";
+  validator.SetOp(*desc.Proto());
+  LOG(INFO) << "execute";
+  validator.Execute(1);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+USE_OP(softmax);
+USE_ANAKIN_CONVERTER(softmax);
--- a/paddle/fluid/inference/anakin/convert/test_split_op.cc
+++ b/paddle/fluid/inference/anakin/convert/test_split_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+#include "paddle/fluid/inference/anakin/convert/split.h"
+#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+template <int Axis>
+void AnakinSliceTest(const std::vector<int> &in_shape,
+                     const std::vector<int> &sections) {
+  std::unordered_set<std::string> parameters({""});
+  framework::Scope scope;
+  AnakinConvertValidation validator(parameters, &scope);
+
+  validator.DeclInputVar("split_input", in_shape);
+  std::vector<std::string> output_vars;
+  for (size_t i = 0; i < sections.size(); ++i) {
+    auto out_shape = in_shape;
+    out_shape[Axis] = sections[i];
+    std::string output_name = "split_out" + std::to_string(i);
+    validator.DeclOutputVar(output_name, out_shape);
+    output_vars.push_back(output_name);
+  }
+
+  // Prepare Op description
+  framework::OpDesc desc;
+  desc.SetType("split");
+  desc.SetInput("X", {"split_input"});
+  desc.SetOutput("Out", output_vars);
+
+  desc.SetAttr("axis", Axis);
+  desc.SetAttr("num", 0);
+  desc.SetAttr("sections", sections);
+
+  validator.SetOp(*desc.Proto());
+
+  validator.Execute(1);
+}
+
+// batch = 0, axis = 1, same shape
+TEST(split_op, test_same_shape_axis1_batch1) {
+  AnakinSliceTest<1>({1, 4, 2, 2}, {2, 2});
+}
+// batch = 0, axis = 1, different shape
+TEST(split_op, test_different_shape_axis1_batch1) {
+  AnakinSliceTest<1>({1, 3, 2, 2}, {2, 1});
+}
+// batch = 10, axis = 1, same shape
+TEST(split_op, test_same_shape_axis1_batch10) {
+  AnakinSliceTest<1>({1, 4, 2, 2}, {2, 2});
+}
+// batch = 10, axis = 1, different shape
+TEST(split_op, test_different_shape_axis1_batch10) {
+  AnakinSliceTest<1>({1, 3, 2, 2}, {2, 1});
+}
+// batch = 0, axis = 2, same shape
+TEST(split_op, test_same_shape_axis2_batch1) {
+  AnakinSliceTest<2>({1, 3, 4, 2}, {2, 2});
+}
+// batch = 0, axis = 2, different shape
+TEST(split_op, test_different_shape_axis2_batch1) {
+  AnakinSliceTest<2>({1, 3, 3, 2}, {2, 1});
+}
+// batch = 10, axis = 2, same shape
+TEST(split_op, test_same_shape_axis2_batch10) {
+  AnakinSliceTest<2>({1, 3, 4, 2}, {2, 2});
+}
+// batch = 10, axis = 2, different shape
+TEST(split_op, test_different_shape_axis2_batch10) {
+  AnakinSliceTest<2>({1, 3, 3, 2}, {2, 1});
+}
+// batch = 0, axis = 3, same shape
+TEST(split_op, test_same_shape_axis3_batch1) {
+  AnakinSliceTest<3>({1, 3, 2, 4}, {2, 2});
+}
+// batch = 0, axis = 3, different shape
+TEST(split_op, test_different_shape_axis3_batch1) {
+  AnakinSliceTest<3>({1, 3, 2, 3}, {2, 1});
+}
+// batch = 10, axis = 3, same shape
+TEST(split_op, test_same_shape_axis3_batch10) {
+  AnakinSliceTest<3>({1, 3, 2, 4}, {2, 2});
+}
+// batch = 10, axis = 3, different shape
+TEST(split_op, test_different_shape_axis3_batch10) {
+  AnakinSliceTest<3>({1, 3, 2, 3}, {2, 1});
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+USE_OP(split);
+USE_ANAKIN_CONVERTER(split);
--- a/paddle/fluid/inference/anakin/convert/test_sum_op.cc
+++ b/paddle/fluid/inference/anakin/convert/test_sum_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+#include "paddle/fluid/inference/anakin/convert/sum.h"
+#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
+#include "paddle/fluid/operators/sum_op.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+TEST(sum, native) {
+  std::unordered_set<std::string> parameters;
+  framework::Scope scope;
+  AnakinConvertValidation validator(parameters, &scope);
+  validator.DeclInputVar("sum_x1", {1, 2, 1, 2});
+  validator.DeclInputVar("sum_x2", {1, 2, 1, 2});
+  validator.DeclOutputVar("sum_out", {1, 2, 1, 2});
+
+  // Prepare Op description
+  framework::OpDesc desc;
+  desc.SetType("sum");
+  desc.SetInput("X", {"sum_x1", "sum_x2"});
+  desc.SetOutput("Out", {"sum_out"});
+
+  validator.SetOp(*desc.Proto());
+  validator.Execute(1);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+USE_OP(sum);
+USE_ANAKIN_CONVERTER(sum);
--- a/paddle/fluid/inference/anakin/convert/test_transpose_op.cc
+++ b/paddle/fluid/inference/anakin/convert/test_transpose_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+TEST(transpose_op, test) {
+  auto* converter = Registry<AnakinOpConverter>::Global().Lookup("transpose");
+  ASSERT_TRUE(converter != nullptr);
+  std::unordered_set<std::string> parameters;
+  framework::Scope scope;
+  AnakinConvertValidation validator(parameters, &scope);
+  validator.DeclInputVar("transpose-X", {2, 3, 4, 5});
+  validator.DeclOutputVar("transpose-Out", {4, 2, 5, 3});
+
+  // Prepare Op description
+  framework::OpDesc desc;
+  desc.SetType("transpose");
+  desc.SetInput("X", {"transpose-X"});
+  desc.SetOutput("Out", {"transpose-Out"});
+  desc.SetAttr("axis", std::vector<int>({2, 0, 3, 1}));
+
+  LOG(INFO) << "set OP";
+  validator.SetOp(*desc.Proto());
+  LOG(INFO) << "execute";
+
+  validator.Execute(3);
+}
+
+// test input shape's dims < 4
+TEST(transpose_op, test2) {
+  std::unordered_set<std::string> parameters;
+  framework::Scope scope;
+  AnakinConvertValidation validator(parameters, &scope);
+  validator.DeclInputVar("transpose-X", {3, 4, 5});
+  validator.DeclOutputVar("transpose-Out", {3, 5, 4});
+
+  // Prepare Op description
+  framework::OpDesc desc;
+  desc.SetType("transpose");
+  desc.SetInput("X", {"transpose-X"});
+  desc.SetOutput("Out", {"transpose-Out"});
+  desc.SetAttr("axis", std::vector<int>({0, 2, 1}));
+
+  LOG(INFO) << "set OP";
+  validator.SetOp(*desc.Proto());
+  LOG(INFO) << "execute";
+
+  validator.Execute(1);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+USE_OP(transpose);
+USE_ANAKIN_CONVERTER(transpose);
--- a/paddle/fluid/inference/anakin/convert/transpose.cc
+++ b/paddle/fluid/inference/anakin/convert/transpose.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/transpose.h"
+#include <algorithm>
+#include <string>
+#include <vector>
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::saber::NV;
+using anakin::saber::Shape;
+using anakin::PTuple;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void TransposeOpConverter::operator()(const framework::proto::OpDesc &op,
+                                      const framework::Scope &scope,
+                                      bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
+  PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
+
+  auto input = op_desc.Input("X").front();
+  auto output = op_desc.Output("Out").front();
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
+  engine_->AddOp(op_name, "Permute", {input}, {output});
+
+  auto axis = boost::get<std::vector<int>>(op_desc.GetAttr("axis"));
+  size_t axis_size = axis.size();
+  while (axis.size() < 4) {
+    axis.push_back(axis_size);
+    axis_size += 1;
+  }
+  engine_->AddOpAttr<PTuple<int>>(op_name, "dims", axis);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_ANAKIN_OP_CONVERTER(transpose, TransposeOpConverter);
--- a/paddle/fluid/inference/anakin/convert/transpose.h
+++ b/paddle/fluid/inference/anakin/convert/transpose.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class TransposeOpConverter : public AnakinOpConverter {
+ public:
+  TransposeOpConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~TransposeOpConverter() {}
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/anakin/convert/ut_helper.h
+++ b/paddle/fluid/inference/anakin/convert/ut_helper.h
@@ -14,6 +14,7 @@ limitations under the License. */

 #pragma once

+#include <gtest/gtest.h>
 #include <map>
 #include <memory>
 #include <string>
@@ -24,6 +25,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/tensor_util.h"
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
 #include "paddle/fluid/inference/anakin/engine.h"
 #include "paddle/fluid/inference/analysis/helper.h"
 #include "paddle/fluid/inference/utils/singleton.h"
@@ -82,7 +84,7 @@ class AnakinConvertValidation {
  AnakinConvertValidation() = delete;

  AnakinConvertValidation(const std::unordered_set<std::string>& parameters,
-                          const framework::Scope& scope)
+                          framework::Scope* scope)
      : parameters_(parameters), scope_(scope), place_(0) {
    PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0);
    engine_.reset(new AnakinEngine<NV, Precision::FP32>(true));
@@ -106,7 +108,7 @@ class AnakinConvertValidation {

  void DeclVar(const std::string& name, const std::vector<int> dim_vec) {
    platform::CUDADeviceContext ctx(place_);
-    auto* x = scope_.Var(name);
+    auto* x = scope_->Var(name);
    auto* x_tensor = x->GetMutable<framework::LoDTensor>();
    x_tensor->Resize(framework::make_ddim(dim_vec));
    RandomizeTensor(x_tensor, place_, ctx);
@@ -118,15 +120,22 @@ class AnakinConvertValidation {
    // should init anakin engine here.

    Singleton<AnakinOpConverter>::Global().ConvertOp(
-        desc, parameters_, scope_, engine_.get(), true /*test_mode*/);
+        desc, parameters_, *scope_, engine_.get(), true /*test_mode*/);
    engine_->Freeze();
+
+    std::map<std::string, std::vector<int>> temp_max_input_shape;
    for (const auto& input : op_desc_->InputArgumentNames()) {
      if (parameters_.count(input)) continue;
-      auto& t = inference::analysis::GetFromScope<framework::LoDTensor>(scope_,
+      auto& t = inference::analysis::GetFromScope<framework::LoDTensor>(*scope_,
                                                                        input);
      auto t_shape = framework::vectorize2int(t.dims());
+      while (t_shape.size() < 4) {
+        t_shape.push_back(1);
+      }
      engine_->SetInputShape(input, t_shape);
+      temp_max_input_shape[input] = t_shape;
    }
+    engine_->SetMaxInputShape(temp_max_input_shape);
    engine_->Optimize();
    engine_->InitGraph();
  }
@@ -138,14 +147,14 @@ class AnakinConvertValidation {
               std::unordered_set<std::string> neglected_output = {}) {
    // Execute Fluid Op
    platform::CUDADeviceContext ctx(place_);
-    op_->Run(scope_, place_);
+    op_->Run(*scope_, place_);

    // std::vector<framework::LoDTensor> input_vector;
    // std::vector<framework::LoDTensor> output_vector;
    std::map<std::string, framework::LoDTensor*> inputs;
    for (const auto& input : op_desc_->InputArgumentNames()) {
      if (parameters_.count(input)) continue;
-      auto* var = scope_.FindVar(input);
+      auto* var = scope_->FindVar(input);
      auto tensor = var->GetMutable<framework::LoDTensor>();
      inputs.insert({input, tensor});
    }
@@ -155,45 +164,38 @@ class AnakinConvertValidation {
    for (const auto& output : op_desc_->OutputArgumentNames()) {
      if (neglected_output.count(output)) continue;
      std::vector<float> fluid_out;
-      auto* var = scope_.FindVar(output);
+      auto* var = scope_->FindVar(output);
      auto tensor = var->GetMutable<framework::LoDTensor>();
      framework::TensorToVector(*tensor, ctx, &fluid_out);
      fluid_outputs.push_back(fluid_out);

-      // size_t fluid_out_size = fluid_out.size();
-      /*for (size_t i = 0; i < fluid_out_size; i++) {
-        std::cout << fluid_out[i] << std::endl;
-      }*/
      outputs.insert({output, tensor});
    }

-    engine_->Execute(inputs, outputs);
+    engine_->Execute(inputs, outputs, stream_);
    int i_output = 0;
    for (const auto& output : op_desc_->OutputArgumentNames()) {
      if (neglected_output.count(output)) continue;
      std::vector<float> anakin_out;
-      auto* var = scope_.FindVar(output);
+      auto* var = scope_->FindVar(output);
      auto tensor = var->GetMutable<framework::LoDTensor>();
      framework::TensorToVector(*tensor, ctx, &anakin_out);

      size_t anakin_out_size = anakin_out.size();
      auto fluid_out = fluid_outputs[i_output++];
      for (size_t i = 0; i < anakin_out_size; i++) {
-        LOG(INFO) << "Output[" << i << "]: anakin[" << anakin_out[i] << "], "
-                  << "fluid[" << fluid_out[i] << "]";
+        EXPECT_LT(std::abs(fluid_out[i] - anakin_out[i]), 1e-3);
      }
    }
  }

-  framework::Scope& scope() { return scope_; }
-
 private:
  std::unique_ptr<AnakinNvEngineT> engine_{nullptr};
  cudaStream_t stream_;
  std::unique_ptr<framework::OperatorBase> op_;
  std::unique_ptr<framework::OpDesc> op_desc_;
  const std::unordered_set<std::string>& parameters_;
-  framework::Scope& scope_;
+  framework::Scope* scope_;
  platform::CUDAPlace place_;
 };


--- a/paddle/fluid/inference/anakin/engine.cc
+++ b/paddle/fluid/inference/anakin/engine.cc
@@ -33,9 +33,15 @@ namespace inference {
 namespace anakin {

 template <typename TargetT, Precision PrecisionType, OpRunType RunType>
-AnakinEngine<TargetT, PrecisionType, RunType>::AnakinEngine(bool need_summary)
+AnakinEngine<TargetT, PrecisionType, RunType>::AnakinEngine(
+    bool need_summary, int device, int max_batch_size,
+    std::map<std::string, std::vector<int>> max_input_shape)
    : graph_(new AnakinGraphT<TargetT, PrecisionType>()),
-      net_(new AnakinNetT<TargetT, PrecisionType, RunType>(need_summary)) {}
+      net_(new AnakinNetT<TargetT, PrecisionType, RunType>(need_summary)) {
+  device_ = device;
+  max_batch_size_ = max_batch_size;
+  max_input_shape_ = max_input_shape;
+}

 template <typename TargetT, Precision PrecisionType, OpRunType RunType>
 AnakinEngine<TargetT, PrecisionType, RunType>::~AnakinEngine() {}
@@ -63,34 +69,53 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::AddOp(
 template <typename TargetT, Precision PrecisionType, OpRunType RunType>
 void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
    const std::map<std::string, framework::LoDTensor *> &inputs,
-    const std::map<std::string, framework::LoDTensor *> &outputs) {
+    const std::map<std::string, framework::LoDTensor *> &outputs,
+    cudaStream_t stream) {
+  cudaDeviceSynchronize();
  for (const auto &input : inputs) {
    auto *tensor = input.second;
    auto *data = tensor->data<float>();
-    auto shape = framework::vectorize2int(tensor->dims());
-    ::anakin::saber::Shape anakin_shape(shape);
+
+    auto fluid_input_shape = framework::vectorize2int(tensor->dims());
+    while (fluid_input_shape.size() < 4) {
+      fluid_input_shape.push_back(1);
+    }
    auto *anakin_input = net_->get_in(input.first);
+    std::vector<int> max_input_shape = max_input_shape_[input.first];
+    int max_shape_sum =
+        std::accumulate(max_input_shape.begin(), max_input_shape.end(), 1,
+                        std::multiplies<int>());
+
+    PADDLE_ENFORCE(max_shape_sum >= tensor->numel(),
+                   "The anakin input max shape should be greater than"
+                   " or equal to the real input shape, Please set the max "
+                   "input shape using EnableAnakinEngine");
+    anakin_input->reshape(fluid_input_shape);
+
    ::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), 0,
-                                                       anakin_shape);
-    anakin_input->share_from(tmp_anakin_tensor);
+                                                       fluid_input_shape);
+    anakin_input->copy_from(tmp_anakin_tensor);
  }
-
+  net_->prediction();
+  cudaDeviceSynchronize();
  for (const auto &output : outputs) {
+    platform::CUDAPlace gpu_place(device_);
    auto *tensor = output.second;
-    auto *data = tensor->data<float>();
-    auto shape = framework::vectorize2int(tensor->dims());
-    ::anakin::saber::Shape anakin_shape(shape);
    auto *anakin_output = net_->get_out(output.first);
-    ::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), 0,
-                                                       anakin_shape);
-    anakin_output->share_from(tmp_anakin_tensor);
+    auto *anakin_data = anakin_output->data();
+    auto anakin_output_shape = anakin_output->valid_shape();
+    tensor->Resize(framework::make_ddim(anakin_output_shape));
+    auto *fluid_data = tensor->mutable_data<float>(gpu_place);
+    memory::Copy(gpu_place, static_cast<void *>(fluid_data), gpu_place,
+                 static_cast<void *>(anakin_data),
+                 tensor->numel() * sizeof(float), stream);
  }
-  net_->prediction();
+  cudaDeviceSynchronize();
 }

 template <typename TargetT, Precision PrecisionType, OpRunType RunType>
 void AnakinEngine<TargetT, PrecisionType, RunType>::Freeze() {
-  PADDLE_ENFORCE(graph_->Freeze(), "Freeze anakin subgraph.");
+  PADDLE_ENFORCE(graph_->Freeze_v3(), "Freeze anakin subgraph.");
 }

 template <typename TargetT, Precision PrecisionType, OpRunType RunType>

--- a/paddle/fluid/inference/anakin/engine.h
+++ b/paddle/fluid/inference/anakin/engine.h
@@ -15,9 +15,11 @@
 #pragma once

 #include <algorithm>
+#include <functional>
 #include <map>
 #include <memory>
 #include <string>
+#include <unordered_map>
 #include <vector>
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/inference/engine.h"
@@ -26,8 +28,12 @@
 #include "framework/core/net/net.h"
 #include "framework/core/types.h"
 #include "framework/graph/graph.h"
+#include "framework/graph/graph_global_mem.h"
 #include "saber/saber_types.h"

+using anakin::Precision;
+using anakin::saber::NV;
+
 namespace anakin {

 template <typename, Precision, OpRunType>
@@ -46,8 +52,13 @@ namespace anakin {
 template <typename TargetT, ::anakin::Precision PrecisionType,
          ::anakin::OpRunType RunType = ::anakin::OpRunType::ASYNC>
 class AnakinEngine {
+  using NetT = ::anakin::Net<TargetT, PrecisionType, RunType>;
+  using GraphT = ::anakin::graph::Graph<TargetT, PrecisionType>;
+
 public:
-  explicit AnakinEngine(bool need_summary = false);
+  explicit AnakinEngine(
+      bool need_summary = false, int device = 0, int max_batch_size = 1,
+      std::map<std::string, std::vector<int>> max_input_shape = {});
  ~AnakinEngine();
  void InitGraph();
  void SetInputShape(const std::string &name, std::vector<int> shape);
@@ -61,20 +72,72 @@ class AnakinEngine {
    PADDLE_ENFORCE(graph_->AddOpAttr(op_name, attr_name, attr_value),
                   "Add operation's attribution.");
  }
-
+  NetT *Net() { return net_.get(); }
+  GraphT *Graph() { return graph_.get(); }
  std::unique_ptr<AnakinEngine> Clone();
+  const std::map<std::string, std::vector<int>> &GetMaxInputShape() {
+    return max_input_shape_;
+  }
+  void SetMaxInputShape(std::map<std::string, std::vector<int>> shape) {
+    max_input_shape_ = shape;
+  }
+  int GetMaxBatchSize() { return max_batch_size_; }
  void Freeze();
  void Optimize();
+  void AllocTmpMem() {
+    PADDLE_ENFORCE(net_->alloc_memory_first(*graph_),
+                   "anakin alloc temp memory first failed");
+  }
+  void Save(std::string path) { graph_->save(path); }
+
+  bool IsInit() { return initialized_; }
+  int GetDevice() { return device_; }
  void Execute(const std::map<std::string, framework::LoDTensor *> &inputs,
-               const std::map<std::string, framework::LoDTensor *> &outputs);
+               const std::map<std::string, framework::LoDTensor *> &outputs,
+               cudaStream_t stream);

 private:
-  using NetT = ::anakin::Net<TargetT, PrecisionType, RunType>;
-  using GraphT = ::anakin::graph::Graph<TargetT, PrecisionType>;
+  bool initialized_{false};
+  int max_batch_size_;
+  std::map<std::string, std::vector<int>> max_input_shape_;
+  int device_;
  std::unique_ptr<GraphT> graph_;
  std::unique_ptr<NetT> net_;
 };

+class AnakinEngineManager {
+  using AnakinNvEngineT = AnakinEngine<NV, Precision::FP32>;
+
+ public:
+  bool HasEngine(const std::string &name) const {
+    if (engines_.count(name) == 0) return false;
+    return engines_.at(name).get() != nullptr;
+  }
+  AnakinNvEngineT *Get(const std::string &name) const {
+    return engines_.at(name).get();
+  }
+
+  AnakinNvEngineT *Create(
+      bool need_summary, int device, int max_batch_size,
+      std::map<std::string, std::vector<int>> max_input_shape,
+      std::string engine_name) {
+    std::unique_lock<std::mutex> lk(mut_);
+    auto *p = new AnakinEngine<NV, Precision::FP32>(
+        need_summary, device, max_batch_size, max_input_shape);
+    engines_[engine_name].reset(p);
+    return p;
+  }
+
+  void DeleteALL() {
+    for (auto &item : engines_) {
+      item.second.reset(nullptr);
+    }
+  }
+
+ private:
+  std::unordered_map<std::string, std::unique_ptr<AnakinNvEngineT>> engines_;
+  std::mutex mut_;
+};
 }  // namespace anakin
 }  // namespace inference
 }  // namespace paddle
--- a/paddle/fluid/inference/anakin/op_teller.cc
+++ b/paddle/fluid/inference/anakin/op_teller.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/op_teller.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+// Just tell by the op_types.
+struct SimpleOpTypeSetTeller : public Teller {
+  SimpleOpTypeSetTeller() {
+    teller_set.insert("mul");
+    teller_set.insert("fc");
+    teller_set.insert("conv2d_fusion");
+    teller_set.insert("split");
+    teller_set.insert("relu");
+    teller_set.insert("pool2d");
+    teller_set.insert("elementwise_add");
+    teller_set.insert("elementwise_mul");
+    teller_set.insert("concat");
+    teller_set.insert("tanh");
+    teller_set.insert("conv2d");
+    teller_set.insert("batch_norm");
+    teller_set.insert("softmax");
+    teller_set.insert("flatten2");
+    teller_set.insert("reshape2");
+    teller_set.insert("transpose2");
+    teller_set.insert("density_prior_box");
+    teller_set.insert("detection_out");
+    teller_set.insert("dropout");
+    teller_set.insert("sigmoid");
+    teller_set.insert("sum");
+  }
+
+  bool operator()(const std::string& op_type,
+                  const framework::OpDesc& desc) override {
+    return teller_set.count(op_type);
+  }
+
+ private:
+  std::unordered_set<std::string> teller_set;
+};
+
+bool OpTeller::Tell(const std::string& op_type, const framework::OpDesc& desc) {
+  for (auto& teller : tellers_) {
+    if ((*teller)(op_type, desc)) return true;
+  }
+  return false;
+}
+
+OpTeller::OpTeller() { tellers_.emplace_back(new SimpleOpTypeSetTeller); }
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/anakin/op_teller.h
+++ b/paddle/fluid/inference/anakin/op_teller.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <vector>
+#include "paddle/fluid/framework/op_desc.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+/*
+ * Single Op teller definition.
+ * One can override this and define a more complex tell logic, considerring more
+ * issues such as op_desc.
+ */
+struct Teller {
+  virtual bool operator()(const std::string& op_type,
+                          const framework::OpDesc& desc) = 0;
+
+  virtual ~Teller() = default;
+};
+/*
+ * A real example:
+ *
+ * struct SomeTeller : public Teller {
+ * bool operator()(const std::string& op_type,
+ *                const framework::OpDesc& desc) override {
+ *  return op_type == "fc" && desc.Inputs().size() == 2;
+ * }
+ *};
+ */
+
+/*
+ * class OpTeller helps to tell whether a fluid
+ * operator can be transformed to a TensorRT layer.
+ */
+class OpTeller {
+ public:
+  static OpTeller& Global() {
+    static std::unique_ptr<OpTeller> x(new OpTeller);
+    return *x;
+  }
+
+  bool Tell(const std::string& op_type, const framework::OpDesc& desc);
+
+ private:
+  OpTeller();
+
+ private:
+  std::vector<std::unique_ptr<Teller>> tellers_;
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/anakin/test_anakin_engine.cc
+++ b/paddle/fluid/inference/anakin/test_anakin_engine.cc
@@ -17,9 +17,6 @@ limitations under the License. */

 #include <map>

-#include "framework/core/net/net.h"
-#include "framework/graph/graph.h"
-#include "framework/graph/graph_global_mem.h"
 #include "paddle/fluid/inference/anakin/engine.h"

 using anakin::graph::GraphGlobalMem;
@@ -84,7 +81,9 @@ TEST_F(TestAnakinEngine, Execute) {
  auto *y_data = y.mutable_data<float>(platform::CUDAPlace());
  std::map<std::string, framework::LoDTensor *> outputs = {{"y", &y}};

-  engine_->Execute(inputs, outputs);
+  cudaStream_t stream;
+
+  engine_->Execute(inputs, outputs, stream);
  auto *y_data_gpu = y_data;
  float y_data_cpu[2];
  cudaMemcpy(y_data_cpu, y_data_gpu, sizeof(float) * 2, cudaMemcpyDeviceToHost);

--- a/paddle/fluid/inference/analysis/argument.h
+++ b/paddle/fluid/inference/analysis/argument.h
@@ -23,6 +23,7 @@

 #pragma once

+#include <map>
 #include <memory>
 #include <string>
 #include <unordered_map>
@@ -58,6 +59,8 @@ struct Argument {

  using unique_ptr_t = std::unique_ptr<void, std::function<void(void*)>>;
  using fusion_statis_t = std::unordered_map<std::string, int>;
+  using engine_opt_info_t = std::map<std::string, std::string>;
+  using anakin_max_shape_t = std::map<std::string, std::vector<int>>;

  bool Has(const std::string& key) const { return valid_fields_.count(key); }

@@ -110,12 +113,14 @@ struct Argument {
 private:                                                                 \
  unique_ptr_t field__##_;

+  DECL_ARGUMENT_FIELD(predictor_id, PredictorID, int);
  // Model path
  DECL_ARGUMENT_FIELD(model_dir, ModelDir, std::string);
  // Model specified with program and parameters files.
  DECL_ARGUMENT_FIELD(model_program_path, ModelProgramPath, std::string);
  DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string);
  DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool);
+  DECL_ARGUMENT_FIELD(engine_opt_info, EngineOptInfo, engine_opt_info_t);

  // The overall graph to work on.
  DECL_ARGUMENT_UNIQUE_FIELD(main_graph, MainGraph, framework::ir::Graph);
@@ -160,6 +165,11 @@ struct Argument {
  DECL_ARGUMENT_FIELD(tensorrt_use_static_engine, TensorRtUseStaticEngine,
                      bool);

+  DECL_ARGUMENT_FIELD(anakin_max_input_shape, AnakinMaxInputShape,
+                      anakin_max_shape_t);
+  DECL_ARGUMENT_FIELD(anakin_max_batch_size, AnakinMaxBatchSize, int);
+  DECL_ARGUMENT_FIELD(use_anakin, UseAnakin, bool);
+
  // Memory optimized related.
  DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool);
  DECL_ARGUMENT_FIELD(static_memory_optim, StaticMemoryOptim, bool);

--- a/paddle/fluid/inference/analysis/ir_pass_manager.cc
+++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc
@@ -13,9 +13,12 @@
 // limitations under the License.

 #include "paddle/fluid/inference/analysis/ir_pass_manager.h"
+#include <map>
+#include <memory>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
+#include <utility>
 #include <vector>
 #include "paddle/fluid/framework/ir/fuse_pass_base.h"
 #include "paddle/fluid/framework/ir/graph.h"
@@ -85,16 +88,40 @@ void IRPassManager::CreatePasses(Argument *argument,
                         AnalysisConfig::Precision::kInt8;

      pass->Set("enable_int8", new bool(enable_int8));
-      std::string model_opt_cache_dir =
-          argument->Has("model_dir")
-              ? argument->model_dir()
-              : GetDirRoot(argument->model_program_path());
-      pass->Set(
-          "model_opt_cache_dir",
-          new std::string(GetOrCreateModelOptCacheDir(model_opt_cache_dir)));
+
+      bool use_static_engine = argument->tensorrt_use_static_engine();
+      bool model_from_memory = argument->model_from_memory();
+      bool int8_valid = !(model_from_memory && enable_int8);
+      PADDLE_ENFORCE(int8_valid,
+                     "TRT INT8 Now don't support model load from memory.");
+
+      if ((!model_from_memory && use_static_engine) || enable_int8) {
+        std::string model_opt_cache_dir =
+            argument->Has("model_dir")
+                ? argument->model_dir()
+                : GetDirRoot(argument->model_program_path());
+        pass->Set(
+            "model_opt_cache_dir",
+            new std::string(GetOrCreateModelOptCacheDir(model_opt_cache_dir)));
+      }
+      pass->Set("gpu_device_id", new int(argument->gpu_device_id()));
+      pass->Set("use_static_engine", new bool(use_static_engine));
+      pass->Set("model_from_memory", new bool(argument->model_from_memory()));
+      pass->Set("engine_opt_info", new std::map<std::string, std::string>(
+                                       argument->engine_opt_info()));
+    }
+
+    if (pass_name == "anakin_subgraph_pass") {
+      pass->Set("program",
+                new framework::ProgramDesc *(&argument->main_program()));
      pass->Set("gpu_device_id", new int(argument->gpu_device_id()));
-      pass->Set("use_static_engine",
-                new bool(argument->tensorrt_use_static_engine()));
+      pass->Set("model_from_memory", new bool(argument->model_from_memory()));
+      pass->Set("engine_opt_info", new std::map<std::string, std::string>(
+                                       argument->engine_opt_info()));
+      pass->Set("predictor_id", new int(argument->predictor_id()));
+      pass->Set("max_input_shape", new std::map<std::string, std::vector<int>>(
+                                       argument->anakin_max_input_shape()));
+      pass->Set("max_batch_size", new int(argument->anakin_max_batch_size()));
    }

    pre_pass = pass_name;

--- a/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
+++ b/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
-cc_library(subgraph_detector SRCS subgraph_detector.cc DEPS proto_desc)
+cc_library(subgraph_detector SRCS subgraph_detector.cc subgraph_util.cc DEPS proto_desc)
 if(WITH_TESTING)
  add_dependencies(subgraph_detector gtest)
 endif()
@@ -14,3 +14,15 @@ if (WITH_GPU AND TENSORRT_FOUND)
  file(APPEND ${pass_file} "USE_PASS(tensorrt_subgraph_pass);\n")
  set(INFER_IR_PASSES ${INFER_IR_PASSES} tensorrt_subgraph_pass CACHE INTERNAL "")
 endif()
+
+if (ANAKIN_FOUND) 
+  cc_library(anakin_subgraph_pass SRCS anakin_subgraph_pass.cc DEPS subgraph_detector anakin_op_teller)
+
+  set(analysis_deps ${analysis_deps}
+          subgraph_detector anakin_subgraph_pass
+          CACHE INTERNAL "")
+
+  set(pass_file ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h)
+  file(APPEND ${pass_file} "USE_PASS(anakin_subgraph_pass);\n")
+  set(INFER_IR_PASSES ${INFER_IR_PASSES} anakin_subgraph_pass CACHE INTERNAL "")
+endif()
--- a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
+++ b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+#include "paddle/fluid/inference/anakin/op_teller.h"
+#include "paddle/fluid/inference/analysis/helper.h"
+#include "paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h"
+#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h"
+#include "paddle/fluid/string/pretty_log.h"
+
+namespace paddle {
+namespace inference {
+namespace analysis {
+
+using framework::ir::Node;
+
+std::unique_ptr<framework::ir::Graph> analysis::AnakinSubgraphPass::ApplyImpl(
+    std::unique_ptr<framework::ir::Graph> graph) const {
+  framework::ir::FusePassBase::Init("anakin_subgraph_pass", graph.get());
+
+  auto teller = [](const framework::ir::Node *node) {
+    if (!node->IsOp() || !node->Op()) return false;
+    return anakin::OpTeller::Global().Tell(node->Op()->Type(), *node->Op());
+  };
+
+  SubGraphFuser fuser(graph.get(), teller, 6 /* min_subgraph_size */);
+  fuser();
+
+  std::vector<std::string> graph_param_names =
+      ExtractParameters(graph->Nodes());
+
+  // those parameter already exist in anakin, and should not have another copy
+  // in fluid.
+  std::vector<std::string> repetitive_params;
+
+  for (auto *node : graph->Nodes()) {
+    if (node->IsOp() && !Agent(node).subgraph()->empty()) {
+      CreateAnakinOp(node, graph.get(), graph_param_names, &repetitive_params);
+      std::unordered_set<const Node *> nodes2remove(
+          Agent(node).subgraph()->begin(), Agent(node).subgraph()->end());
+      framework::ir::GraphSafeRemoveNodes(graph.get(), nodes2remove);
+    }
+  }
+
+  std::unordered_set<const Node *> nodes2remove;
+  for (auto *node : graph->Nodes()) {
+    if (node->IsOp() && Agent(node).deleted()) {
+      nodes2remove.insert(node);
+    }
+  }
+  framework::ir::GraphSafeRemoveNodes(graph.get(), nodes2remove);
+  graph->Set(framework::ir::kRepetitiveParamAttr,
+             new std::vector<std::string>(repetitive_params));
+
+  return graph;
+}
+
+std::string GenerateAnakinEngineKey(const std::set<std::string> &engine_inputs,
+                                    const std::set<std::string> &engine_outputs,
+                                    std::string id) {
+  std::string engine_hash_key = "";
+  for (auto name : engine_inputs) {
+    engine_hash_key += name;
+  }
+  for (auto name : engine_outputs) {
+    engine_hash_key += name;
+  }
+  engine_hash_key += id;
+  auto engine_key = std::to_string(std::hash<std::string>()(engine_hash_key));
+  return engine_key;
+}
+
+void AnakinSubgraphPass::CreateAnakinOp(
+    framework::ir::Node *node, Graph *graph,
+    const std::vector<std::string> &graph_params,
+    std::vector<std::string> *repetitive_params) const {
+  auto *op_desc = node->Op();
+  auto &subgraph = *Agent(node).subgraph();
+  PADDLE_ENFORCE(!subgraph.empty());
+
+  framework::ProgramDesc *program_desc =
+      Get<framework::ProgramDesc *>("program");
+  // Add new block for TensorRTEngineOP
+  const framework::BlockDesc &main_block =
+      program_desc->Block(framework::kRootBlockIndex);
+  // const framework::BlockDesc& main_block = program_desc->Block(0);
+  framework::BlockDesc *new_block = program_desc->AppendBlock(main_block);
+
+  // An fake block desc.
+  framework::proto::BlockDesc block_proto;
+  framework::BlockDesc block_desc(nullptr, &block_proto);
+  block_desc.Proto()->set_parent_idx(-1);
+  block_desc.Proto()->set_idx(0);
+  string::PrettyLogDetail("---  detect a sub-graph with %d nodes",
+                          subgraph.size());
+
+  for (auto *node : subgraph) {
+    auto *new_block_op = new_block->AppendOp();
+    auto *op = block_desc.AppendOp();
+    *new_block_op->Proto() = *node->Op()->Proto();
+    *op->Proto() = *node->Op()->Proto();
+  }
+
+  // Then, we will use the input_names_with_id and output_names_with_id to
+  // generate the eigine key.
+  // So, We use set instead of unordered_set here to ensure that the engine key
+  // is unique.
+  std::set<std::string> input_names;
+  std::set<std::string> input_names_with_id;
+  std::vector<std::string> params;
+  for (auto *x : node->inputs) {
+    input_names.insert(x->Name());
+    input_names_with_id.insert(x->Name() + std::to_string(x->id()));
+    if (std::count(graph_params.begin(), graph_params.end(), x->Name()) > 0) {
+      params.push_back(x->Name());
+    }
+  }
+  std::copy(params.begin(), params.end(),
+            std::back_inserter(*repetitive_params));
+  op_desc->SetInput(
+      "Xs", std::vector<std::string>(input_names.begin(), input_names.end()));
+
+  std::set<std::string> output_names;
+  std::set<std::string> output_names_with_id;
+  for (auto *x : node->outputs) {
+    output_names.insert(x->Name());
+    output_names_with_id.insert(x->Name() + std::to_string(x->id()));
+  }
+
+  op_desc->SetOutput(
+      "Ys", std::vector<std::string>(output_names.begin(), output_names.end()));
+  op_desc->SetType("anakin_engine");
+
+  std::unordered_map<std::string, std::string> output_name_map;
+  auto &subgraph_nodes = *Agent(node).subgraph();
+
+  // The following procedure is used to rename all the intermediate
+  // variables and the output variables of the subgraph.
+  RenameAndGetOutputs(subgraph_nodes, &block_desc, input_names_with_id,
+                      &output_names_with_id, &output_names, &output_name_map,
+                      false);
+
+  // When anakin engine runs at the end of the operation,
+  // output_mapping help us copy the data from the renamed ITensor
+  // to Tensor.
+  std::vector<std::string> output_mapping;
+  for (auto name : output_names) {
+    PADDLE_ENFORCE(output_name_map.count(name) != 0);
+    output_mapping.push_back(output_name_map[name]);
+  }
+
+  auto *vars = block_desc.Proto()->mutable_vars();
+  for (framework::ir::Node *node : graph->Nodes()) {
+    if (node->IsVar() && node->Var()) {
+      *vars->Add() = *node->Var()->Proto();
+    }
+  }
+
+  PADDLE_ENFORCE(!block_desc.Proto()->vars().empty(),
+                 "the block has no var-desc");
+  PADDLE_ENFORCE(!output_mapping.empty());
+  op_desc->SetBlockAttr("sub_block", new_block);
+  SetAttr(op_desc->Proto(), "subgraph",
+          block_desc.Proto()->SerializeAsString());
+  // Set attrs
+  SetAttr(op_desc->Proto(), "parameters", params);
+  SetAttr(op_desc->Proto(), "output_name_mapping", output_mapping);
+  int predictor_id = Get<int>("predictor_id");
+  auto engine_key = GenerateAnakinEngineKey(
+      input_names_with_id, output_names_with_id, std::to_string(predictor_id));
+
+  SetAttr(op_desc->Proto(), "engine_key", engine_key);
+  auto max_input_shape =
+      Get<std::map<std::string, std::vector<int>>>("max_input_shape");
+  auto max_batch_size = Get<int>("max_batch_size");
+
+  auto *anakin_engine =
+      inference::Singleton<anakin::AnakinEngineManager>::Global().Create(
+          true, Get<int>("gpu_device_id"), max_batch_size, max_input_shape,
+          engine_key);
+
+  auto *scope = param_scope();
+  std::unordered_set<std::string> param_set(params.begin(), params.end());
+  framework::BlockDesc block_desc_temp(nullptr, block_desc.Proto());
+
+  inference::Singleton<inference::anakin::AnakinOpConverter>::Global()
+      .ConvertBlockToAnakinEngine(
+          &block_desc_temp, scope,
+          std::vector<std::string>(input_names.begin(), input_names.end()),
+          param_set, output_mapping, anakin_engine);
+}
+
+}  // namespace analysis
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_PASS(anakin_subgraph_pass,
+              paddle::inference::analysis::AnakinSubgraphPass);
--- a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h
+++ b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <paddle/fluid/framework/ir/fuse_pass_base.h>
+#include <memory>
+#include <string>
+#include <vector>
+#include "paddle/fluid/framework/ir/pass.h"
+#include "paddle/fluid/inference/anakin/engine.h"
+#include "paddle/fluid/inference/analysis/ir_passes/subgraph_util.h"
+
+using anakin::Precision;
+using anakin::saber::NV;
+namespace paddle {
+namespace inference {
+namespace analysis {
+
+class AnakinSubgraphPass : public framework::ir::FusePassBase {
+ public:
+  std::unique_ptr<framework::ir::Graph> ApplyImpl(
+      std::unique_ptr<framework::ir::Graph> graph) const override;
+
+ private:
+  void CreateAnakinOp(framework::ir::Node *x, framework::ir::Graph *graph,
+                      const std::vector<std::string> &graph_params,
+                      std::vector<std::string> *repetitive_params) const;
+  void CleanIntermediateOutputs(framework::ir::Node *node);
+};
+
+}  // namespace analysis
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc
+++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc
--- a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc
+++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc
--- a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h
+++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h
--- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
--- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h
+++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h
@@ -20,6 +20,7 @@
 #include <vector>
 #include "paddle/fluid/framework/ir/fuse_pass_base.h"
 #include "paddle/fluid/framework/ir/pass.h"
+#include "paddle/fluid/inference/analysis/ir_passes/subgraph_util.h"

 namespace paddle {
 namespace inference {

--- a/paddle/fluid/inference/api/CMakeLists.txt
+++ b/paddle/fluid/inference/api/CMakeLists.txt
--- a/paddle/fluid/inference/api/analysis_config.cc
+++ b/paddle/fluid/inference/api/analysis_config.cc
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
--- a/paddle/fluid/inference/api/analysis_predictor.h
+++ b/paddle/fluid/inference/api/analysis_predictor.h
--- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc
+++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
--- a/paddle/fluid/inference/api/paddle_analysis_config.h
+++ b/paddle/fluid/inference/api/paddle_analysis_config.h
--- a/paddle/fluid/inference/api/paddle_api.h
+++ b/paddle/fluid/inference/api/paddle_api.h
--- a/paddle/fluid/inference/api/paddle_pass_builder.cc
+++ b/paddle/fluid/inference/api/paddle_pass_builder.cc
--- a/paddle/fluid/inference/api/paddle_pass_builder.h
+++ b/paddle/fluid/inference/api/paddle_pass_builder.h
--- a/paddle/fluid/inference/tensorrt/convert/io_converter.h
+++ b/paddle/fluid/inference/tensorrt/convert/io_converter.h
--- a/paddle/fluid/inference/tensorrt/convert/op_converter.h
+++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h
--- a/paddle/fluid/inference/utils/singleton.h
+++ b/paddle/fluid/inference/utils/singleton.h
--- a/paddle/fluid/operators/CMakeLists.txt
+++ b/paddle/fluid/operators/CMakeLists.txt
--- a/paddle/fluid/operators/anakin/CMakeLists.txt
+++ b/paddle/fluid/operators/anakin/CMakeLists.txt
--- a/paddle/fluid/operators/anakin/anakin_engine_op.cc
+++ b/paddle/fluid/operators/anakin/anakin_engine_op.cc
--- a/paddle/fluid/operators/anakin/anakin_engine_op.h
+++ b/paddle/fluid/operators/anakin/anakin_engine_op.h
--- a/paddle/fluid/platform/device_context.cc
+++ b/paddle/fluid/platform/device_context.cc
--- a/paddle/fluid/platform/device_context.h
+++ b/paddle/fluid/platform/device_context.h