From 662e4d7c660e63435e9f1e48af44cf84e3c68ea8 Mon Sep 17 00:00:00 2001
From: "cen.li" <cen.li@bitmain.com>
Date: Fri, 6 Dec 2019 10:20:42 +0800
Subject: [PATCH] * change to bridge way * fix code_style test=develop

---
 lite/api/test_resnet50_lite_bm.cc             |   4 +-
 lite/backends/bm/CMakeLists.txt               |   2 +-
 lite/backends/bm/bm_context.cc                |  28 +++
 lite/backends/bm/target_wrapper.cc            |  10 +-
 lite/core/context.h                           |  11 +-
 lite/core/memory.cc                           |  16 ++
 lite/core/memory.h                            |   9 +
 lite/core/mir/static_kernel_pick_pass.cc      |   2 +-
 .../mir/subgraph/generate_bm_program_pass.cc  | 183 ++++++++++++++++++
 .../mir/subgraph/generate_bm_program_pass.h   |  60 ++++++
 lite/kernels/bm/CMakeLists.txt                |  14 +-
 lite/kernels/bm/batch_norm_compute.cc         |  75 -------
 lite/kernels/bm/bridges/CMakeLists.txt        |  23 +++
 .../bm/{conv_compute.h => bridges/act_op.cc}  |  32 ++-
 .../batch_norm_op.cc}                         |  36 ++--
 lite/kernels/bm/bridges/conv_op.cc            |  36 ++++
 lite/kernels/bm/bridges/elementwise_ops.cc    |  36 ++++
 lite/kernels/bm/bridges/mul_op.cc             |  36 ++++
 lite/kernels/bm/bridges/pool_op.cc            |  36 ++++
 .../{calib_compute.h => bridges/registry.cc}  |  38 ++--
 lite/kernels/bm/bridges/registry.h            |  85 ++++++++
 lite/kernels/bm/bridges/scale_op.cc           |  36 ++++
 lite/kernels/bm/bridges/softmax_op.cc         |  36 ++++
 lite/kernels/bm/calib_compute.cc              |  76 --------
 lite/kernels/bm/conv_compute.cc               |  71 -------
 lite/kernels/bm/elementwise_compute.cc        |  56 ------
 ...{batch_norm_compute.h => graph_compute.cc} |  46 +++--
 ...{elementwise_compute.h => graph_compute.h} |  23 ++-
 lite/kernels/bm/io_copy_compute.cc            | 167 ----------------
 lite/kernels/bm/mul_compute.cc                |  61 ------
 lite/kernels/bm/pool_compute.cc               |  59 ------
 lite/kernels/bm/pool_compute.h                |  50 -----
 lite/kernels/bm/relu_compute.cc               |  59 ------
 lite/kernels/bm/relu_compute.h                |  50 -----
 lite/kernels/bm/scale_compute.cc              |  59 ------
 lite/kernels/bm/scale_compute.h               |  50 -----
 lite/kernels/bm/softmax_compute.cc            |  59 ------
 lite/kernels/bm/softmax_compute.h             |  50 -----
 38 files changed, 710 insertions(+), 1070 deletions(-)
 create mode 100644 lite/backends/bm/bm_context.cc
 create mode 100644 lite/core/mir/subgraph/generate_bm_program_pass.cc
 create mode 100644 lite/core/mir/subgraph/generate_bm_program_pass.h
 delete mode 100644 lite/kernels/bm/batch_norm_compute.cc
 create mode 100644 lite/kernels/bm/bridges/CMakeLists.txt
 rename lite/kernels/bm/{conv_compute.h => bridges/act_op.cc} (60%)
 rename lite/kernels/bm/{mul_compute.h => bridges/batch_norm_op.cc} (56%)
 create mode 100644 lite/kernels/bm/bridges/conv_op.cc
 create mode 100644 lite/kernels/bm/bridges/elementwise_ops.cc
 create mode 100644 lite/kernels/bm/bridges/mul_op.cc
 create mode 100644 lite/kernels/bm/bridges/pool_op.cc
 rename lite/kernels/bm/{calib_compute.h => bridges/registry.cc} (60%)
 create mode 100644 lite/kernels/bm/bridges/registry.h
 create mode 100644 lite/kernels/bm/bridges/scale_op.cc
 create mode 100644 lite/kernels/bm/bridges/softmax_op.cc
 delete mode 100644 lite/kernels/bm/calib_compute.cc
 delete mode 100644 lite/kernels/bm/conv_compute.cc
 delete mode 100644 lite/kernels/bm/elementwise_compute.cc
 rename lite/kernels/bm/{batch_norm_compute.h => graph_compute.cc} (53%)
 rename lite/kernels/bm/{elementwise_compute.h => graph_compute.h} (69%)
 delete mode 100644 lite/kernels/bm/io_copy_compute.cc
 delete mode 100644 lite/kernels/bm/mul_compute.cc
 delete mode 100644 lite/kernels/bm/pool_compute.cc
 delete mode 100644 lite/kernels/bm/pool_compute.h
 delete mode 100644 lite/kernels/bm/relu_compute.cc
 delete mode 100644 lite/kernels/bm/relu_compute.h
 delete mode 100644 lite/kernels/bm/scale_compute.cc
 delete mode 100644 lite/kernels/bm/scale_compute.h
 delete mode 100644 lite/kernels/bm/softmax_compute.cc
 delete mode 100644 lite/kernels/bm/softmax_compute.h

diff --git a/lite/api/test_resnet50_lite_bm.cc b/lite/api/test_resnet50_lite_bm.cc
index 8b7cb5242e..0e34e80573 100644
--- a/lite/api/test_resnet50_lite_bm.cc
+++ b/lite/api/test_resnet50_lite_bm.cc
@@ -35,7 +35,7 @@ void TestModel(const std::vector<Place>& valid_places) {
   //DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
   lite::Predictor predictor;
   predictor.Build(FLAGS_model_dir, "", "", valid_places);
-#if 0
+
   auto* input_tensor = predictor.GetInput(0);
   input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
   auto* data = input_tensor->mutable_data<float>();
@@ -53,7 +53,6 @@ void TestModel(const std::vector<Place>& valid_places) {
       fs >> data[i];
     }
   }
-
   for (int i = 0; i < FLAGS_warmup; ++i) {
     predictor.Run();
   }
@@ -102,7 +101,6 @@ void TestModel(const std::vector<Place>& valid_places) {
     }
   }
   LOG(INFO) << "max val:" << max_val << ", max_val_arg:" << max_val_arg;
-#endif
 }
 
 TEST(ResNet50, test_bm) {
diff --git a/lite/backends/bm/CMakeLists.txt b/lite/backends/bm/CMakeLists.txt
index 9e15b9836b..fc0dd3acb3 100644
--- a/lite/backends/bm/CMakeLists.txt
+++ b/lite/backends/bm/CMakeLists.txt
@@ -2,4 +2,4 @@ if (NOT LITE_WITH_BM)
     return()
 endif()
 
-lite_cc_library(target_wrapper_bm SRCS target_wrapper.cc DEPS ${bm_runtime_libs})
+lite_cc_library(target_wrapper_bm SRCS target_wrapper.cc bm_context.cc DEPS ${bm_runtime_libs})
diff --git a/lite/backends/bm/bm_context.cc b/lite/backends/bm/bm_context.cc
new file mode 100644
index 0000000000..46b825136b
--- /dev/null
+++ b/lite/backends/bm/bm_context.cc
@@ -0,0 +1,28 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "lite/core/context.h"
+#include "bmcompiler_if.h"
+
+namespace paddle {
+namespace lite {
+
+static const char* CHIP_NAME = "BM1684";
+
+void BMContext::InitOnce() {
+    compiler_handle_ = create_bmcompiler(CHIP_NAME);
+    CHECK(NULL != compiler_handle_);
+}
+
+}  // namespace lite
+}  // namespace paddle
diff --git a/lite/backends/bm/target_wrapper.cc b/lite/backends/bm/target_wrapper.cc
index 9f440b1846..489bbbd7b0 100644
--- a/lite/backends/bm/target_wrapper.cc
+++ b/lite/backends/bm/target_wrapper.cc
@@ -14,13 +14,14 @@
 #include <map>
 #include "lite/backends/bm/target_wrapper.h"
 #include "bmlib_runtime.h"
+#include "bmcompiler_if.h"
 
 namespace paddle {
 namespace lite {
 
 static int g_current_device_id = 0;
 static std::map<int, bm_handle_t> g_bm_handles;
-    
+
 size_t TargetWrapperBM::num_devices() {
   int count = 0;
   bm_dev_getcount(&count);
@@ -32,7 +33,8 @@ void TargetWrapperBM::SetDevice(int id) {
 
   if (g_bm_handles.find(id) == g_bm_handles.end()) {
     bm_handle_t bm_handle;
-    bm_dev_request(&bm_handle, id);
+    bm_status_t ret = bm_dev_request(&bm_handle, id);
+    CHECK_EQ(ret, BM_SUCCESS) << "Failed with error code: " << (int)ret;
     g_bm_handles.insert(std::pair<int, bm_handle_t>(id, bm_handle));
   }
   return;
@@ -41,6 +43,10 @@ void TargetWrapperBM::SetDevice(int id) {
 void* TargetWrapperBM::Malloc(size_t size) {
   void* ptr{};
 
+  if (g_bm_handles.find(g_current_device_id) == g_bm_handles.end()) {
+      SetDevice(g_current_device_id);
+  } 
+
   bm_handle_t bm_handle = g_bm_handles.at(g_current_device_id);
   bm_device_mem_t* p_mem = (bm_device_mem_t*)malloc(sizeof(bm_device_mem_t));
   bm_malloc_device_byte(bm_handle, p_mem, size);
diff --git a/lite/core/context.h b/lite/core/context.h
index 796533664a..19238f1a9b 100644
--- a/lite/core/context.h
+++ b/lite/core/context.h
@@ -96,11 +96,17 @@ class Context<TargetType::kBM> {
   Context() {}
   explicit Context(const BMContext& ctx);
   // NOTE: InitOnce should only be used by ContextScheduler
-  void InitOnce() {}
+  void InitOnce();
   void CopySharedTo(BMContext* ctx) {}
 
   std::string name() const { return "BMContext"; }
-  };
+  void* compiler_handle() { 
+    return compiler_handle_;
+  }
+
+ private:
+  void* compiler_handle_{nullptr};
+};
 #endif
 
 #ifdef LITE_WITH_XPU
@@ -340,7 +346,6 @@ class ContextScheduler {
   std::unique_ptr<KernelContext> NewContext(TargetType target) {
     std::unique_ptr<KernelContext> ctx(new KernelContext);
 
-    LOG(INFO) << "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa " << int(target) << " " << int(TARGET(kBM));
     switch (target) {
       case TARGET(kHost):
         kernel_contexts_[TargetType::kHost].As<HostContext>().CopySharedTo(
diff --git a/lite/core/memory.cc b/lite/core/memory.cc
index b3cb18b336..ec94f69be1 100644
--- a/lite/core/memory.cc
+++ b/lite/core/memory.cc
@@ -40,6 +40,11 @@ void* TargetMalloc(TargetType target, size_t size) {
       data = TargetWrapper<TARGET(kFPGA)>::Malloc(size);
       break;
 #endif  // LITE_WITH_OPENCL
+#ifdef LITE_WITH_BM
+    case TargetType::kBM:
+      data = TargetWrapper<TARGET(kBM)>::Malloc(size);
+      break;
+#endif      
     default:
       LOG(FATAL) << "Unknown supported target " << TargetToStr(target);
   }
@@ -69,6 +74,11 @@ void TargetFree(TargetType target, void* data) {
       TargetWrapper<TARGET(kFPGA)>::Free(data);
       break;
 #endif  // LITE_WITH_CUDA
+#ifdef LITE_WITH_BM
+    case TargetType::kBM:
+      TargetWrapper<TARGET(kBM)>::Free(data);
+      break;
+#endif
     default:
       LOG(FATAL) << "Unknown type";
   }
@@ -95,6 +105,12 @@ void TargetCopy(TargetType target, void* dst, const void* src, size_t size) {
           dst, src, size, IoDirection::DtoD);
       break;
 #endif
+#ifdef LITE_WITH_BM
+    case TargetType::kBM:
+      TargetWrapper<TARGET(kBM)>::MemcpySync(
+          dst, src, size, IoDirection::DtoD);
+      break;
+#endif 
 #ifdef LITE_WITH_OPENCL
     case TargetType::kOpenCL:
       TargetWrapperCL::MemcpySync(dst, src, size, IoDirection::DtoD);
diff --git a/lite/core/memory.h b/lite/core/memory.h
index cb4ac044e7..4e486d391c 100644
--- a/lite/core/memory.h
+++ b/lite/core/memory.h
@@ -25,6 +25,10 @@
 #include "lite/backends/cuda/target_wrapper.h"
 #endif  // LITE_WITH_CUDA
 
+#ifdef LITE_WITH_BM
+#include "lite/backends/bm/target_wrapper.h" 
+#endif  // LITE_WITH_BM
+
 namespace paddle {
 namespace lite {
 
@@ -71,6 +75,11 @@ void CopySync(void* dst, const void* src, size_t size, IoDirection dir) {
     case TARGET(kFPGA):
       TargetWrapper<TARGET(kFPGA)>::MemcpySync(dst, src, size, dir);
       break;
+#endif
+#ifdef LITE_WITH_BM
+    case TARGET(kBM):
+      TargetWrapper<TARGET(kBM)>::MemcpySync(dst, src, size, dir);
+      break;
 #endif
   }
 }
diff --git a/lite/core/mir/static_kernel_pick_pass.cc b/lite/core/mir/static_kernel_pick_pass.cc
index 90aca56aec..3384a15de9 100644
--- a/lite/core/mir/static_kernel_pick_pass.cc
+++ b/lite/core/mir/static_kernel_pick_pass.cc
@@ -33,6 +33,7 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
   kernel_pick_factors_.ConsiderTarget();
   kernel_pick_factors_.ConsiderPrecision();
   kernel_pick_factors_.ConsiderDataLayout();
+
   CHECK(kernel_pick_factors_.any_factor_considered())
       << "kernel_pick_factors should be specified first";
   CHECK(graph) << "graph not valid";
@@ -114,7 +115,6 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
         bool all_output_type_match = true;
         auto expect_output_type =
             out_type_int8 ? PRECISION(kInt8) : PRECISION(kFloat);
-
         for (auto& arg_name : output_arguments) {
           const Type* out_arg_ty =
               candidate.second->GetOutputDeclType(arg_name);
diff --git a/lite/core/mir/subgraph/generate_bm_program_pass.cc b/lite/core/mir/subgraph/generate_bm_program_pass.cc
new file mode 100644
index 0000000000..b5cdc749d3
--- /dev/null
+++ b/lite/core/mir/subgraph/generate_bm_program_pass.cc
@@ -0,0 +1,183 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/core/mir/subgraph/generate_bm_program_pass.h"
+#include <memory>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+#include "lite/core/mir/graph_visualize_pass.h"
+#include "lite/core/mir/pass_registry.h"
+#include "lite/core/mir/pattern_matcher.h"
+
+namespace paddle {
+namespace lite {
+namespace mir {
+namespace subgraph {
+
+std::shared_ptr<ge::Operator> GenerateBMProgramPass::CvtVarNode(
+    lite::mir::Node* var_node, const Scope* scope) {
+  CHECK(var_node->IsArg());
+  const auto& arg = var_node->AsArg();
+  VLOG(4) << "Convert var node " << arg.name;
+
+  auto* var = scope->FindVar(arg.name);
+  CHECK(var);
+  auto* tensor = var->GetMutable<lite::Tensor>();
+  CHECK(tensor);
+  auto dims = tensor->dims();
+  if (arg.is_weight) {
+    auto wgt = std::make_shared<ge::op::Const>(arg.name);
+    LOG(INFO) << " Convert const var node " << arg.name;
+    VLOG(4) << dims;
+    wgt->set_attr_value(lite::npu::CvtTensor(tensor));
+    return wgt;
+  } else {
+    CHECK_EQ(dims.size(), 4);
+    LOG(INFO) << "[NPU] Convert data var node " << arg.name;
+    LOG(INFO) << dims;
+    // TODO(xxx): support more types and dims size
+    ge::TensorDesc desc(ge::Shape(dims.Vectorize()),
+                        ge::Format::FORMAT_NCHW,
+                        ge::DataType::DT_FLOAT);
+
+    //   auto size = desc.GetShape().GetShapeSize();
+    //  ge::TensorUtils::SetSize(desc, size*sizeof(float));
+    //  ge::TensorUtils::SetRealDimCnt(desc, 4);
+    auto data = std::make_shared<ge::op::Data>(arg.name);
+    data->update_input_desc_x(desc);
+    return data;
+  }
+  return nullptr;
+}
+
+void GenerateNPUProgramPass::CvtAllOpNodes(
+    const std::vector<Node*>& nodes2cvt,
+    lite::kernels::npu::bridges::node_map_type* converted_vars) {
+  const auto& bridges = lite::kernels::npu::bridges::Factory::Instance();
+  const auto& cvtfunc_map = bridges.AllFunctions();
+  // return record all converted vars
+  // op node's inputs must be found in converted_vars
+  for (auto& node : nodes2cvt) {
+    lite::kernels::npu::bridges::node_map_type node_inputs;
+    auto& stmt = node->AsStmt();
+    for (auto& var_node : node->inlinks) {
+      auto& arg = var_node->AsArg();
+      // weight should be handled in the converter, so skip here
+      if (arg.is_weight) {
+        continue;
+      }
+      auto var_name = arg.name;
+      if (!converted_vars->count(var_name)) {
+        converted_vars->insert(
+            std::make_pair(var_name, CvtVarNode(var_node, stmt.op()->scope())));
+      }
+      node_inputs.insert(*converted_vars->find(var_name));
+    }
+    auto node_outputs = cvtfunc_map.at(stmt.op_type())(stmt.op(), node_inputs);
+    converted_vars->insert(node_outputs.begin(), node_outputs.end());
+  }
+}
+
+std::string GenerateNPUProgramPass::BuildNPUGraph(
+    const std::unordered_set<Node*>& op_nodes,
+    const std::unordered_set<Node*>& in_data_vars,
+    const std::unordered_set<Node*>& out_data_vars,
+    int sub_id) {
+  auto ordered_nodes = GetTopologicalOrder(op_nodes);
+  lite::kernels::npu::bridges::node_map_type converted_vars;
+  CvtAllOpNodes(ordered_nodes, &converted_vars);
+
+  std::vector<std::string> in_var_names;
+  std::vector<std::string> out_var_names;
+  std::vector<ge::Operator> inputs;
+  std::vector<ge::Operator> outputs;
+  for (auto i : in_data_vars) {
+    auto argname = i->AsArg().name;
+    in_var_names.push_back(argname);
+    inputs.push_back(*converted_vars.at(argname));
+  }
+  for (auto i : out_data_vars) {
+    auto argname = i->AsArg().name;
+    out_var_names.push_back(argname);
+    outputs.push_back(*converted_vars.at(argname));
+  }
+
+  std::string weight_var_name = "graph" + std::to_string(sub_id) + "_weights";
+  auto any_op = (*op_nodes.begin())->AsStmt().op();
+  auto weight = any_op->scope()->Var(weight_var_name)->GetMutable<Tensor>();
+  weight->set_persistable(true);
+  weight->set_precision(PRECISION(kInt8));
+  // Compiling IR graph to NPU model and store mode data into weight tensor with
+  // persistable=true, Sothat the model parser can recognize it and save it to
+  // param files
+  if (!lite::npu::BuildModel(inputs, outputs, weight)) {
+    LOG(WARNING) << "[NPU] Build NPU graph failed (subgraph=" << sub_id << ")";
+    throw std::runtime_error("Build NPU graph failed.");
+  }
+  LOG(INFO) << "[NPU] Build NPU graph success (subgraph=" << sub_id << ")";
+  return weight_var_name;
+}
+
+void GenerateBMProgramPass::GenSubgraph(
+    const std::unique_ptr<SSAGraph>& graph,
+    const std::unordered_set<Node*>& op_nodes,
+    int sub_id) {
+#if 0
+  std::unordered_set<Node*> in_data_vars;
+  std::unordered_set<Node*> in_wgt_vars;
+  std::unordered_set<Node*> out_data_vars;
+  std::unordered_set<Node*> out_unused_vars;
+  FindInputOutputVars(
+      op_nodes, &in_data_vars, &in_wgt_vars, &out_data_vars, &out_unused_vars);
+
+  auto weight_var_name =
+      BuildNPUGraph(op_nodes, in_data_vars, out_data_vars, sub_id);
+
+  auto any_op = (*op_nodes.begin())->AsStmt().op();
+  InsertNewNode(graph,
+                weight_var_name,
+                any_op->scope(),
+                any_op->valid_places(),
+                in_data_vars,
+                in_wgt_vars,
+                out_data_vars,
+                out_unused_vars);
+
+  auto nodes2rm = GetNode2rm(
+      op_nodes, {in_data_vars, in_wgt_vars, out_data_vars, out_unused_vars});
+
+  GraphSafeRemoveNodes(graph.get(), nodes2rm);
+#endif
+}
+
+void GenerateBMProgramPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
+  
+}
+
+std::unique_ptr<RuntimeProgram> GenerateBMProgramPass::GenProgram() {
+  std::unique_ptr<RuntimeProgram> program(
+      new RuntimeProgram(std::move(insts_)));
+  return program;
+}
+
+}  // namespace subgraph
+}  // namespace mir
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_MIR_PASS(generate_bm_program_pass,
+                  paddle::lite::mir::subgraph::GenerateBMProgramPass)
+    .BindTargets({TARGET(kBM)});
diff --git a/lite/core/mir/subgraph/generate_bm_program_pass.h b/lite/core/mir/subgraph/generate_bm_program_pass.h
new file mode 100644
index 0000000000..36f12000f6
--- /dev/null
+++ b/lite/core/mir/subgraph/generate_bm_program_pass.h
@@ -0,0 +1,60 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <map>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+#include "lite/core/context.h"
+#include "lite/core/mir/pass.h"
+#include "lite/core/mir/subgraph/subgraph_program_pass.h"
+
+namespace paddle {
+namespace lite {
+namespace mir {
+namespace subgraph {
+
+class GenerateBMProgramPass : public SubgraphProgramPass {
+ public:
+  using key2nodes_t = std::map<std::string, Node*>;
+
+  void Apply(const std::unique_ptr<SSAGraph>& graph) override;
+  std::unique_ptr<RuntimeProgram> GenProgram();
+
+ protected:
+  // nodes2cvt: op nodes to convert
+  // return cvted_vars: converted var nodes
+  void CvtAllOpNodes(const std::vector<Node*>& nodes2cvt,
+                     lite::kernels::npu::bridges::node_map_type* cvted_vars);
+
+  std::shared_ptr<ge::Operator> CvtVarNode(lite::mir::Node* var_node,
+                                           const Scope* scope);
+
+  std::string BuildGraph(const std::unordered_set<Node*>& op_nodes,
+                            const std::unordered_set<Node*>& in_data_vars,
+                            const std::unordered_set<Node*>& out_data_vars,
+                            int sub_id);
+
+ private:
+  std::vector<Instruction> insts_;
+};
+
+}  // namespace subgraph
+}  // namespace mir
+}  // namespace lite
+}  // namespace paddle
diff --git a/lite/kernels/bm/CMakeLists.txt b/lite/kernels/bm/CMakeLists.txt
index 24fca1f797..4ae8392736 100644
--- a/lite/kernels/bm/CMakeLists.txt
+++ b/lite/kernels/bm/CMakeLists.txt
@@ -2,16 +2,6 @@ if(NOT LITE_WITH_BM)
   return ()
 endif()
 
-add_kernel(conv_compute_bm BM basic SRCS conv_compute.cc DEPS ${lite_kernel_deps})
-add_kernel(calib_compute_bm BM basic SRCS calib_compute.cc DEPS ${lite_kernel_deps})
-add_kernel(pool_compute_bm BM basic SRCS pool_compute.cc DEPS ${lite_kernel_deps})
-add_kernel(elementwise_compute_bm BM basic SRCS elementwise_compute.cc DEPS ${lite_kernel_deps})
-add_kernel(relu_compute_bm BM basic SRCS relu_compute.cc DEPS ${lite_kernel_deps})
-add_kernel(softmax_compute_bm BM basic SRCS softmax_compute.cc DEPS ${lite_kernel_deps})
-add_kernel(batch_norm_compute_bm BM basic SRCS batch_norm_compute.cc DEPS ${lite_kernel_deps})
-add_kernel(scale_compute_bm BM basic SRCS scale_compute.cc DEPS ${lite_kernel_deps})
-add_kernel(mul_compute_bm BM basic SRCS mul_compute.cc DEPS ${lite_kernel_deps})
-add_kernel(io_copy_compute_bm BM basic SRCS io_copy_compute.cc DEPS ${lite_kernel_deps})
-
-message(STATUS "compile with lite BM kernels")
+add_kernel(graph_compute_bm BM basic SRCS graph_compute.cc DEPS ${lite_kernel_deps} )
 
+add_subdirectory(bridges)
diff --git a/lite/kernels/bm/batch_norm_compute.cc b/lite/kernels/bm/batch_norm_compute.cc
deleted file mode 100644
index a90db26f94..0000000000
--- a/lite/kernels/bm/batch_norm_compute.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/kernels/bm/batch_norm_compute.h"
-#include <string>
-#include <vector>
-#include "lite/core/op_registry.h"
-#include "lite/core/type_system.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-void BatchNormCompute::PrepareForRun() {
-  return;
-}
-
-void BatchNormCompute::Run() {
-  return;
-}
-
-template <PrecisionType Ptype_out>
-void BatchNormComputeInt8<Ptype_out>::PrepareForRun() {
-  return;
-}
-
-template <PrecisionType Ptype_out> 
-void BatchNormComputeInt8<Ptype_out>::Run() {
-  return;
-}
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-REGISTER_LITE_KERNEL(
-  batch_norm, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::BatchNormCompute, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Scale", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Mean", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Variance", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("MeanOut", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("VarianceOut", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("SavedMean", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("SavedVariance", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
-
-REGISTER_LITE_KERNEL(
-  batch_norm, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::BatchNormComputeInt8<PRECISION(kInt8)>, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Scale", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Mean", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Variance", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("MeanOut", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("VarianceOut", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("SavedMean", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("SavedVariance", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
diff --git a/lite/kernels/bm/bridges/CMakeLists.txt b/lite/kernels/bm/bridges/CMakeLists.txt
new file mode 100644
index 0000000000..690b38d2e1
--- /dev/null
+++ b/lite/kernels/bm/bridges/CMakeLists.txt
@@ -0,0 +1,23 @@
+lite_cc_library(bm_bridge_registry SRCS registry.cc)
+
+set(bm_bridge_deps bm_bridge_registry op)
+
+lite_cc_library(bm_bridge_act_op SRCS act_op.cc DEPS ${bm_bridge_deps})
+lite_cc_library(bm_bridge_conv_op SRCS conv_op.cc DEPS ${bm_bridge_deps})
+lite_cc_library(bm_bridge_elementwise_ops SRCS elementwise_ops.cc DEPS ${bm_bridge_deps})
+lite_cc_library(bm_bridge_pool_op SRCS pool_op.cc DEPS ${bm_bridge_deps})
+lite_cc_library(bm_bridge_softmax_op SRCS softmax_op.cc DEPS ${bm_bridge_deps})
+lite_cc_library(bm_bridge_mul_op SRCS mul_op.cc DEPS ${bm_bridge_deps})
+lite_cc_library(bm_bridge_batch_norm_op SRCS batch_norm_op.cc DEPS ${bm_bridge_deps})
+
+set(bm_bridges
+        bm_bridge_registry
+        bm_bridge_act_op
+        bm_bridge_conv_op
+        bm_bridge_elementwise_ops
+        bm_bridge_pool_op
+        bm_bridge_softmax_op
+        bm_bridge_mul_op
+        bm_bridge_batch_norm_op
+        CACHE INTERNAL "bm_bridges")
+
diff --git a/lite/kernels/bm/conv_compute.h b/lite/kernels/bm/bridges/act_op.cc
similarity index 60%
rename from lite/kernels/bm/conv_compute.h
rename to lite/kernels/bm/bridges/act_op.cc
index bf5f7d7aa6..5779c39d71 100644
--- a/lite/kernels/bm/conv_compute.h
+++ b/lite/kernels/bm/bridges/act_op.cc
@@ -12,35 +12,25 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#pragma once
-#include "lite/core/kernel.h"
+#include "lite/kernels/bm/bridges/registry.h"
 
 namespace paddle {
 namespace lite {
 namespace kernels {
 namespace bm {
+namespace bridges {
 
-class ConvCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
- public:
-  using param_t = operators::ConvParam;
-
-  void PrepareForRun() {};
-  void Run() {};
-  virtual ~ConvCompute() = default;
-};
-
-template <PrecisionType Ptype_out>
-class ConvComputeInt8
-    : public KernelLite<TARGET(kBM), PRECISION(kInt8), DATALAYOUT(kNCHW)> {
- public:
-  using param_t = operators::ConvParam;
-
-  void PrepareForRun() {};
-  void Run() {};
-  virtual ~ConvComputeInt8() = default;
-};
+node_map_type ActConverter(const std::shared_ptr<lite::OpLite> op,
+                            const node_map_type& input_nodes) {
+  // output converted nodes
+  node_map_type output_nodes;
+  return output_nodes;
+}
 
+}  // namespace bridges
 }  // namespace bm
 }  // namespace kernels
 }  // namespace lite
 }  // namespace paddle
+
+REGISTER_BM_BRIDGE(relu, paddle::lite::kernels::bm::bridges::ActConverter);
diff --git a/lite/kernels/bm/mul_compute.h b/lite/kernels/bm/bridges/batch_norm_op.cc
similarity index 56%
rename from lite/kernels/bm/mul_compute.h
rename to lite/kernels/bm/bridges/batch_norm_op.cc
index 66ce5e3488..015b5fb153 100644
--- a/lite/kernels/bm/mul_compute.h
+++ b/lite/kernels/bm/bridges/batch_norm_op.cc
@@ -12,39 +12,25 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#pragma once
-#include <algorithm>
-#include "lite/core/kernel.h"
-#include "lite/operators/mul_op.h"
+#include "lite/kernels/bm/bridges/registry.h"
 
 namespace paddle {
 namespace lite {
 namespace kernels {
 namespace bm {
+namespace bridges {
 
-class MulCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
-  public:
-    using param_t = operators::MulParam;
-
-    void PrepareForRun() override;
-    void Run() override;
-
-    virtual ~MulCompute() = default;
-};
-
-template <PrecisionType Ptype_out>
-class MulComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
-  public:
-    using param_t = operators::MulParam;
-        
-    void PrepareForRun() override;
-    void Run() override;
-        
-    virtual ~MulComputeInt8() = default;
-};
-    
+node_map_type BatchNormConverter(const std::shared_ptr<lite::OpLite> op,
+                            const node_map_type& input_nodes) {
+  // output converted nodes
+  node_map_type output_nodes;
+  return output_nodes;
+}
 
+}  // namespace bridges
 }  // namespace bm
 }  // namespace kernels
 }  // namespace lite
 }  // namespace paddle
+
+REGISTER_BM_BRIDGE(batch_norm, paddle::lite::kernels::bm::bridges::BatchNormConverter);
diff --git a/lite/kernels/bm/bridges/conv_op.cc b/lite/kernels/bm/bridges/conv_op.cc
new file mode 100644
index 0000000000..c996293923
--- /dev/null
+++ b/lite/kernels/bm/bridges/conv_op.cc
@@ -0,0 +1,36 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/bm/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+namespace bridges {
+
+node_map_type ConvConverter(const std::shared_ptr<lite::OpLite> op,
+                            const node_map_type& input_nodes) {
+  // output converted nodes
+  node_map_type output_nodes;
+  return output_nodes;
+}
+
+}  // namespace bridges
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_BM_BRIDGE(conv2d, paddle::lite::kernels::bm::bridges::ConvConverter);
diff --git a/lite/kernels/bm/bridges/elementwise_ops.cc b/lite/kernels/bm/bridges/elementwise_ops.cc
new file mode 100644
index 0000000000..31f0c73f20
--- /dev/null
+++ b/lite/kernels/bm/bridges/elementwise_ops.cc
@@ -0,0 +1,36 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/bm/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+namespace bridges {
+
+node_map_type ElementwiseConverter(const std::shared_ptr<lite::OpLite> op,
+                            const node_map_type& input_nodes) {
+  // output converted nodes
+  node_map_type output_nodes;
+  return output_nodes;
+}
+
+}  // namespace bridges
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_BM_BRIDGE(elementwise, paddle::lite::kernels::bm::bridges::ElementwiseConverter);
diff --git a/lite/kernels/bm/bridges/mul_op.cc b/lite/kernels/bm/bridges/mul_op.cc
new file mode 100644
index 0000000000..3b85731640
--- /dev/null
+++ b/lite/kernels/bm/bridges/mul_op.cc
@@ -0,0 +1,36 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/bm/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+namespace bridges {
+
+node_map_type MulConverter(const std::shared_ptr<lite::OpLite> op,
+                            const node_map_type& input_nodes) {
+  // output converted nodes
+  node_map_type output_nodes;
+  return output_nodes;
+}
+
+}  // namespace bridges
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_BM_BRIDGE(mul, paddle::lite::kernels::bm::bridges::MulConverter);
diff --git a/lite/kernels/bm/bridges/pool_op.cc b/lite/kernels/bm/bridges/pool_op.cc
new file mode 100644
index 0000000000..81b4583292
--- /dev/null
+++ b/lite/kernels/bm/bridges/pool_op.cc
@@ -0,0 +1,36 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/bm/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+namespace bridges {
+
+node_map_type PoolConverter(const std::shared_ptr<lite::OpLite> op,
+                            const node_map_type& input_nodes) {
+  // output converted nodes
+  node_map_type output_nodes;
+  return output_nodes;
+}
+
+}  // namespace bridges
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_BM_BRIDGE(pool2d, paddle::lite::kernels::bm::bridges::PoolConverter);
diff --git a/lite/kernels/bm/calib_compute.h b/lite/kernels/bm/bridges/registry.cc
similarity index 60%
rename from lite/kernels/bm/calib_compute.h
rename to lite/kernels/bm/bridges/registry.cc
index 8a5bf057c0..017cf0db43 100644
--- a/lite/kernels/bm/calib_compute.h
+++ b/lite/kernels/bm/bridges/registry.cc
@@ -12,39 +12,29 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#pragma once
-#include "lite/core/kernel.h"
-#include "lite/operators/calib_op.h"
+#include "lite/kernels/bm/bridges/registry.h"
+#include <utility>
 
 namespace paddle {
 namespace lite {
 namespace kernels {
 namespace bm {
+namespace bridges {
 
-class CalibComputeFp32ToInt8
-    : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
- public:
-  using param_t = operators::CalibParam;
+Factory& Factory::Instance() {
+  static Factory g_bm_bridge;
+  return g_bm_bridge;
+}
 
-  void Run() override;
+bool Factory::HasType(const std::string& op_type) const {
+  return map_.count(op_type);
+}
 
-  ~CalibComputeFp32ToInt8() override{};
-
- private:
-};
-
-class CalibComputeInt8ToFp32
-    : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
- public:
-  using param_t = operators::CalibParam;
-
-  void Run() override;
-
-  ~CalibComputeInt8ToFp32() override{};
-
- private:
-};
+void Factory::Insert(const std::string& op_type, const func_type& func_name) {
+  map_.insert(std::make_pair(op_type, func_name));
+}
 
+}  // namespace bridges
 }  // namespace bm
 }  // namespace kernels
 }  // namespace lite
diff --git a/lite/kernels/bm/bridges/registry.h b/lite/kernels/bm/bridges/registry.h
new file mode 100644
index 0000000000..73101aebb2
--- /dev/null
+++ b/lite/kernels/bm/bridges/registry.h
@@ -0,0 +1,85 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include "lite/core/op_lite.h"
+#include "lite/utils/macros.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+namespace bridges {
+
+// var_name, bm node point
+using node_map_type =
+    std::unordered_map<std::string, std::shared_ptr<void*>>;
+
+using func_type = std::function<node_map_type(const std::shared_ptr<OpLite>,
+                                              const node_map_type&)>;
+using cvt_map_type = std::unordered_map<std::string, func_type>;
+class Factory {
+ public:
+  static Factory& Instance();
+
+  const cvt_map_type& AllFunctions() const { return map_; }
+  bool HasType(const std::string& op_type) const;
+  void Insert(const std::string& op_type, const func_type& func_name);
+  Factory() = default;
+
+ private:
+  cvt_map_type map_;
+  DISALLOW_COPY_AND_ASSIGN(Factory);
+};
+
+}  // namespace bridges
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+// some platform-independent defintion
+#if defined(_WIN32)
+#define UNUSED
+#define __builtin_expect(EXP, C) (EXP)
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#define STATIC_ASSERT_JITKERNEL_GLOBAL_NAMESPACE(uniq_name, msg)              \
+  struct __test_global_namespace_##uniq_name##__ {};                          \
+  static_assert(std::is_same<::__test_global_namespace_##uniq_name##__,       \
+                             __test_global_namespace_##uniq_name##__>::value, \
+                msg)
+
+#define REGISTER_BM_BRIDGE(op_type, cvt_func_name)                         \
+  STATIC_ASSERT_JITKERNEL_GLOBAL_NAMESPACE(                                 \
+      __reg_bm_bridge_##op_type##__,                                       \
+      "REGISTER_BM_BRIDGE must be called in global namespace only once!"); \
+  int __reg_bm_bridge_##op_type##_Insert() {                               \
+    paddle::lite::kernels::bm::bridges::Factory::Instance().Insert(        \
+        #op_type, cvt_func_name);                                           \
+    return 0;                                                               \
+  }
+
+#define USE_BM_BRIDGE(op_type)                                  \
+  extern int __reg_bm_bridge_##op_type##_Insert();              \
+  static int __reg_bm_bridge_##op_type##_Insert_return UNUSED = \
+      __reg_bm_bridge_##op_type##_Insert();
diff --git a/lite/kernels/bm/bridges/scale_op.cc b/lite/kernels/bm/bridges/scale_op.cc
new file mode 100644
index 0000000000..39c6d60218
--- /dev/null
+++ b/lite/kernels/bm/bridges/scale_op.cc
@@ -0,0 +1,36 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/bm/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+namespace bridges {
+
+node_map_type ScaleConverter(const std::shared_ptr<lite::OpLite> op,
+                            const node_map_type& input_nodes) {
+  // output converted nodes
+  node_map_type output_nodes;
+  return output_nodes;
+}
+
+}  // namespace bridges
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_BM_BRIDGE(scale, paddle::lite::kernels::bm::bridges::ScaleConverter);
diff --git a/lite/kernels/bm/bridges/softmax_op.cc b/lite/kernels/bm/bridges/softmax_op.cc
new file mode 100644
index 0000000000..e1f99b02ab
--- /dev/null
+++ b/lite/kernels/bm/bridges/softmax_op.cc
@@ -0,0 +1,36 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/bm/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+namespace bridges {
+
+node_map_type SoftmaxConverter(const std::shared_ptr<lite::OpLite> op,
+                            const node_map_type& input_nodes) {
+  // output converted nodes
+  node_map_type output_nodes;
+  return output_nodes;
+}
+
+}  // namespace bridges
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_BM_BRIDGE(softmax, paddle::lite::kernels::bm::bridges::SoftmaxConverter);
diff --git a/lite/kernels/bm/calib_compute.cc b/lite/kernels/bm/calib_compute.cc
deleted file mode 100644
index 2c12fffd6c..0000000000
--- a/lite/kernels/bm/calib_compute.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/kernels/bm/calib_compute.h"
-#include <vector>
-#include "lite/core/op_registry.h"
-#include "lite/core/type_system.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-void CalibComputeFp32ToInt8::Run() {
-}
-
-void CalibComputeInt8ToFp32::Run() {
-  return;
-}
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-REGISTER_LITE_KERNEL(calib,
-                     kBM,
-                     kInt8,
-                     kNCHW,
-                     paddle::lite::kernels::bm::CalibComputeFp32ToInt8,
-                     fp32_to_int8)
-    .BindInput("Input",
-               {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
-    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))})
-    .Finalize();
-
-REGISTER_LITE_KERNEL(calib,
-                     kBM,
-                     kInt8,
-                     kNCHW,
-                     paddle::lite::kernels::bm::CalibComputeInt8ToFp32,
-                     int8_to_fp32)
-    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))})
-    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
-    .Finalize();
-REGISTER_LITE_KERNEL(calib_once,
-                     kBM,
-                     kInt8,
-                     kNCHW,
-                     paddle::lite::kernels::bm::CalibComputeFp32ToInt8,
-                     fp32_to_int8)
-    .BindInput("Input",
-               {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
-    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))})
-    .Finalize();
-
-REGISTER_LITE_KERNEL(calib_once,
-                     kBM,
-                     kInt8,
-                     kNCHW,
-                     paddle::lite::kernels::bm::CalibComputeInt8ToFp32,
-                     int8_to_fp32)
-    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))})
-    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
-    .Finalize();
diff --git a/lite/kernels/bm/conv_compute.cc b/lite/kernels/bm/conv_compute.cc
deleted file mode 100644
index c6def595cc..0000000000
--- a/lite/kernels/bm/conv_compute.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/kernels/bm/conv_compute.h"
-#include <vector>
-#include "lite/core/op_registry.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-template class ConvComputeInt8<PRECISION(kInt8)>;
-template class ConvComputeInt8<PRECISION(kFloat)>;
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-REGISTER_LITE_KERNEL(
-    conv2d, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::ConvCompute, def)
-    .BindInput("Input",
-               {LiteType::GetTensorTy(TARGET(kBM),
-                                      PRECISION(kFloat),
-                                      DATALAYOUT(kNCHW))})
-    .BindInput("Bias",
-               {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
-    .BindInput("Filter",
-               {LiteType::GetTensorTy(TARGET(kBM),
-                                      PRECISION(kFloat),
-                                      DATALAYOUT(kNCHW))})
-    .BindOutput("Output",
-                {LiteType::GetTensorTy(TARGET(kBM),
-                                       PRECISION(kFloat),
-                                       DATALAYOUT(kNCHW))})
-    .Finalize();
-
-REGISTER_LITE_KERNEL(
-    conv2d,
-    kBM,
-    kInt8,
-    kNCHW,
-    paddle::lite::kernels::bm::ConvComputeInt8<PRECISION(kInt8)>,
-    int8_out)
-    .BindInput("Input",
-               {LiteType::GetTensorTy(TARGET(kBM),
-                                      PRECISION(kInt8),
-                                      DATALAYOUT(kNCHW))})
-    .BindInput("Bias",
-               {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
-    .BindInput("Filter",
-               {LiteType::GetTensorTy(TARGET(kBM),
-                                      PRECISION(kInt8),
-                                      DATALAYOUT(kNCHW))})
-    .BindOutput("Output",
-                {LiteType::GetTensorTy(TARGET(kBM),
-                                       PRECISION(kFloat),
-                                       DATALAYOUT(kNCHW))})
-    .Finalize();
diff --git a/lite/kernels/bm/elementwise_compute.cc b/lite/kernels/bm/elementwise_compute.cc
deleted file mode 100644
index 0b55301de3..0000000000
--- a/lite/kernels/bm/elementwise_compute.cc
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/kernels/bm/elementwise_compute.h"
-#include <string>
-#include <vector>
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-void ElementwiseAddCompute::Run() {
-}
-
-template <PrecisionType Ptype_out>
-void ElementwiseAddComputeInt8<Ptype_out>::Run() {
-}
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-REGISTER_LITE_KERNEL(elementwise_add,
-                     kBM,
-                     kFloat,
-                     kNCHW,
-                     paddle::lite::kernels::bm::ElementwiseAddCompute,
-                     def)
-    .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-    .BindInput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
-    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-    .Finalize();
-
-REGISTER_LITE_KERNEL(elementwise_add,
-                     kBM,
-                     kInt8,
-                     kNCHW,
-                     paddle::lite::kernels::bm::ElementwiseAddComputeInt8<PRECISION(kInt8)>,
-                     def)
-.BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-.BindInput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
-.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-.Finalize();
diff --git a/lite/kernels/bm/batch_norm_compute.h b/lite/kernels/bm/graph_compute.cc
similarity index 53%
rename from lite/kernels/bm/batch_norm_compute.h
rename to lite/kernels/bm/graph_compute.cc
index 691e0579a8..5b3d36c3da 100644
--- a/lite/kernels/bm/batch_norm_compute.h
+++ b/lite/kernels/bm/graph_compute.cc
@@ -12,39 +12,37 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#pragma once
-#include <algorithm>
-#include "lite/core/kernel.h"
-#include "lite/operators/batch_norm_op.h"
+#include "lite/kernels/bm/graph_compute.h"
+#include <sys/time.h>
+#include <time.h>
+#include <string>
+#include <vector>
+#include "lite/core/op_registry.h"
+#include "lite/core/type_system.h"
 
 namespace paddle {
 namespace lite {
 namespace kernels {
 namespace bm {
 
-class BatchNormCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
-  public:
-    using param_t = operators::BatchNormParam;
+void GraphCompute::PrepareForRun() {
+}
 
-    void PrepareForRun() override;
-    void Run() override;
-
-    virtual ~BatchNormCompute() = default;
-};
-
-template <PrecisionType Ptype_out>
-class BatchNormComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
-  public:
-    using param_t = operators::BatchNormParam;
-        
-    void PrepareForRun() override;
-    void Run() override;
-        
-    virtual ~BatchNormComputeInt8() = default;
-};
-    
+void GraphCompute::Run() {
+}
 
 }  // namespace bm
 }  // namespace kernels
 }  // namespace lite
 }  // namespace paddle
+
+REGISTER_LITE_KERNEL(graph_op,
+                     kBM,
+                     kFloat,
+                     kNCHW,
+                     paddle::lite::kernels::bm::GraphCompute,
+                     def)
+    .BindInput("Inputs", {LiteType::GetTensorTy(TARGET(kHost))})
+    .BindInput("Weight", {LiteType::GetTensorTy(TARGET(kHost))})
+    .BindOutput("Outputs", {LiteType::GetTensorTy(TARGET(kHost))})
+    .Finalize();
diff --git a/lite/kernels/bm/elementwise_compute.h b/lite/kernels/bm/graph_compute.h
similarity index 69%
rename from lite/kernels/bm/elementwise_compute.h
rename to lite/kernels/bm/graph_compute.h
index c24050fb34..31629c0c31 100644
--- a/lite/kernels/bm/elementwise_compute.h
+++ b/lite/kernels/bm/graph_compute.h
@@ -13,29 +13,28 @@
 // limitations under the License.
 
 #pragma once
-#include <algorithm>
+
+#include <memory>
+#include <string>
+#include <vector>
 #include "lite/core/kernel.h"
 #include "lite/core/op_registry.h"
+#include "lite/core/types.h"
 
 namespace paddle {
 namespace lite {
 namespace kernels {
 namespace bm {
 
-class ElementwiseAddCompute
-    : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
+class GraphCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
  public:
-   void Run() override;
+  using param_t = operators::GraphParam;
 
-   virtual ~ElementwiseAddCompute() = default;
-};
+  void PrepareForRun() override;
+
+  void Run() override;
 
-template <PrecisionType Ptype_out>
-class ElementwiseAddComputeInt8
-    : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
-  public:
-    void Run() override;
-    virtual ~ElementwiseAddComputeInt8() = default;
+  virtual ~GraphCompute() = default;
 };
 
 }  // namespace bm
diff --git a/lite/kernels/bm/io_copy_compute.cc b/lite/kernels/bm/io_copy_compute.cc
deleted file mode 100644
index 4e991d97f7..0000000000
--- a/lite/kernels/bm/io_copy_compute.cc
+++ /dev/null
@@ -1,167 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/backends/bm/target_wrapper.h"
-#include "lite/core/kernel.h"
-#include "lite/core/op_registry.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-using TargetW = TargetWrapper<TARGET(kBM)>;
-
-// Host to BM memory.
-void CopyFromHostSync(void* target, const void* source, size_t size) {
-  TargetW::MemcpySync(target, source, size, IoDirection::HtoD);
-}
-
-void CopyFromHostAsync(void* target,
-                       const void* source,
-                       size_t size,
-                       TargetW::stream_t stream) {
-  TargetW::MemcpyAsync(target, source, size, IoDirection::HtoD, stream);
-}
-
-// Host to Host memory.
-void CopyToHostSync(void* target, const void* source, size_t size) {
-  TargetW::MemcpySync(target, source, size, IoDirection::DtoH);
-}
-
-/*
- * This kernel copies a tensor from host to BM space.
- */
-class IoCopyHostToBMCompute
-    : public KernelLite<TARGET(kBM), PRECISION(kAny), DATALAYOUT(kAny)> {
- public:
-  void Run() override {
-    auto& param = Param<operators::IoCopyParam>();
-    CHECK(param.x->target() == TARGET(kHost) ||
-          param.x->target() == TARGET(kX86));
-    auto mem_size = param.x->memory_size();
-    VLOG(4) << "copy size " << mem_size;
-    auto* data = param.y->mutable_data(TARGET(kBM), mem_size);
-    CopyFromHostSync(data, param.x->raw_data(), mem_size);
-  }
-
-  std::unique_ptr<type_infer_handler_t> GetTypeInferHandler() override {
-    std::unique_ptr<type_infer_handler_t> res(new type_infer_handler_t);
-    *res = [](const std::map<std::string, const Type*>& inputs,
-              const std::string& out) -> const Type* {
-      CHECK(!inputs.empty());
-      auto* type = inputs.at("Input");
-      CHECK(type->target() == TARGET(kHost));
-
-      auto out_place = type->place();
-      out_place.target = TARGET(kBM);
-      auto* out_type = Type::Get(type->id(),
-                                 out_place.target,
-                                 out_place.precision,
-                                 out_place.layout,
-                                 out_place.device);
-      return out_type;
-    };
-    return res;
-  }
-
-  std::string doc() const override { return "Copy IO from HOST to BM"; }
-};
-
-/*
- * This kernel copies a tensor from BM to host space.
- */
-class IoCopyBMToHostCompute
-    : public KernelLite<TARGET(kBM), PRECISION(kAny), DATALAYOUT(kAny)> {
- public:
-  void Run() override {
-    auto& param = Param<operators::IoCopyParam>();
-    CHECK(param.x->target() == TARGET(kBM));
-    auto mem_size = param.x->memory_size();
-    VLOG(4) << "io copy bm to host " << mem_size;
-    auto* data = param.y->mutable_data(TARGET(kHost), mem_size);
-    CopyToHostSync(data, param.x->raw_data(), mem_size);
-  }
-
-  std::string doc() const override { return "Copy IO from BM to HOST"; }
-};
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-REGISTER_LITE_KERNEL(io_copy,
-                     kBM,
-                     kAny,
-                     kAny,
-                     paddle::lite::kernels::bm::IoCopyHostToBMCompute,
-                     host_to_device)
-    .BindInput("Input",
-               {LiteType::GetTensorTy(TARGET(kHost),
-                                      PRECISION(kAny),
-                                      DATALAYOUT(kAny))})
-    .BindOutput("Out",
-                {LiteType::GetTensorTy(TARGET(kBM),
-                                       PRECISION(kAny),
-                                       DATALAYOUT(kAny))})
-    .Finalize();
-
-REGISTER_LITE_KERNEL(io_copy,
-                     kBM,
-                     kAny,
-                     kAny,
-                     paddle::lite::kernels::bm::IoCopyBMToHostCompute,
-                     device_to_host)
-    .BindInput("Input",
-               {LiteType::GetTensorTy(TARGET(kBM),
-                                      PRECISION(kAny),
-                                      DATALAYOUT(kAny))})
-    .BindOutput("Out",
-                {LiteType::GetTensorTy(TARGET(kHost),
-                                       PRECISION(kAny),
-                                       DATALAYOUT(kAny))})
-    .Finalize();
-
-REGISTER_LITE_KERNEL(io_copy_once,
-                     kBM,
-                     kAny,
-                     kAny,
-                     paddle::lite::kernels::bm::IoCopyHostToBMCompute,
-                     host_to_device)
-    .BindInput("Input",
-               {LiteType::GetTensorTy(TARGET(kHost),
-                                      PRECISION(kAny),
-                                      DATALAYOUT(kAny))})
-    .BindOutput("Out",
-                {LiteType::GetTensorTy(TARGET(kBM),
-                                       PRECISION(kAny),
-                                       DATALAYOUT(kAny))})
-    .Finalize();
-
-REGISTER_LITE_KERNEL(io_copy_once,
-                     kBM,
-                     kAny,
-                     kAny,
-                     paddle::lite::kernels::bm::IoCopyBMToHostCompute,
-                     device_to_host)
-    .BindInput("Input",
-               {LiteType::GetTensorTy(TARGET(kBM),
-                                      PRECISION(kAny),
-                                      DATALAYOUT(kAny))})
-    .BindOutput("Out",
-                {LiteType::GetTensorTy(TARGET(kHost),
-                                       PRECISION(kAny),
-                                       DATALAYOUT(kAny))})
-    .Finalize();
diff --git a/lite/kernels/bm/mul_compute.cc b/lite/kernels/bm/mul_compute.cc
deleted file mode 100644
index 054726ea94..0000000000
--- a/lite/kernels/bm/mul_compute.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/kernels/bm/mul_compute.h"
-#include <string>
-#include <vector>
-#include "lite/core/op_registry.h"
-#include "lite/core/type_system.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-void MulCompute::PrepareForRun() {
-  return;
-}
-
-void MulCompute::Run() {
-  return;
-}
-
-template <PrecisionType Ptype_out>
-void MulComputeInt8<Ptype_out>::PrepareForRun() {
-  return;
-}
-
-template <PrecisionType Ptype_out> 
-void MulComputeInt8<Ptype_out>::Run() {
-  return;
-}
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-REGISTER_LITE_KERNEL(
-  mul, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::MulCompute, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
-
-REGISTER_LITE_KERNEL(
-  mul, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::MulComputeInt8<PRECISION(kInt8)>, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
diff --git a/lite/kernels/bm/pool_compute.cc b/lite/kernels/bm/pool_compute.cc
deleted file mode 100644
index 39a6b99727..0000000000
--- a/lite/kernels/bm/pool_compute.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/kernels/bm/pool_compute.h"
-#include <string>
-#include <vector>
-#include "lite/core/op_registry.h"
-#include "lite/core/type_system.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-void PoolCompute::PrepareForRun() {
-  return;
-}
-
-void PoolCompute::Run() {
-  return;
-}
-
-template <PrecisionType Ptype_out>
-void PoolComputeInt8<Ptype_out>::PrepareForRun() {
-  return;
-}
-
-template <PrecisionType Ptype_out> 
-void PoolComputeInt8<Ptype_out>::Run() {
-  return;
-}
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-REGISTER_LITE_KERNEL(
-  pool2d, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::PoolCompute, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
-
-REGISTER_LITE_KERNEL(
-  pool2d, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::PoolComputeInt8<PRECISION(kInt8)>, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
diff --git a/lite/kernels/bm/pool_compute.h b/lite/kernels/bm/pool_compute.h
deleted file mode 100644
index 10f9d00546..0000000000
--- a/lite/kernels/bm/pool_compute.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include <algorithm>
-#include "lite/core/kernel.h"
-#include "lite/operators/pool_op.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-class PoolCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
-  public:
-    using param_t = operators::PoolParam;
-
-    void PrepareForRun() override;
-    void Run() override;
-
-    virtual ~PoolCompute() = default;
-};
-
-template <PrecisionType Ptype_out>
-class PoolComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
-  public:
-    using param_t = operators::PoolParam;
-        
-    void PrepareForRun() override;
-    void Run() override;
-        
-    virtual ~PoolComputeInt8() = default;
-};
-    
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
diff --git a/lite/kernels/bm/relu_compute.cc b/lite/kernels/bm/relu_compute.cc
deleted file mode 100644
index d7bffa50ab..0000000000
--- a/lite/kernels/bm/relu_compute.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/kernels/bm/relu_compute.h"
-#include <string>
-#include <vector>
-#include "lite/core/op_registry.h"
-#include "lite/core/type_system.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-void ReluCompute::PrepareForRun() {
-  return;
-}
-
-void ReluCompute::Run() {
-  return;
-}
-
-template <PrecisionType Ptype_out>
-void ReluComputeInt8<Ptype_out>::PrepareForRun() {
-  return;
-}
-
-template <PrecisionType Ptype_out> 
-void ReluComputeInt8<Ptype_out>::Run() {
-  return;
-}
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-REGISTER_LITE_KERNEL(
-  relu, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::ReluCompute, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
-
-REGISTER_LITE_KERNEL(
-  relu, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::ReluComputeInt8<PRECISION(kInt8)>, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
diff --git a/lite/kernels/bm/relu_compute.h b/lite/kernels/bm/relu_compute.h
deleted file mode 100644
index 701a2dc2c4..0000000000
--- a/lite/kernels/bm/relu_compute.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include <algorithm>
-#include "lite/core/kernel.h"
-#include "lite/operators/relu_op.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-class ReluCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
-  public:
-    using param_t = operators::ActivationParam;
-
-    void PrepareForRun() override;
-    void Run() override;
-
-    virtual ~ReluCompute() = default;
-};
-
-template <PrecisionType Ptype_out>
-class ReluComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
-  public:
-    using param_t = operators::ActivationParam;
-        
-    void PrepareForRun() override;
-    void Run() override;
-        
-    virtual ~ReluComputeInt8() = default;
-};
-    
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
diff --git a/lite/kernels/bm/scale_compute.cc b/lite/kernels/bm/scale_compute.cc
deleted file mode 100644
index 3e3bff5e59..0000000000
--- a/lite/kernels/bm/scale_compute.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/kernels/bm/scale_compute.h"
-#include <string>
-#include <vector>
-#include "lite/core/op_registry.h"
-#include "lite/core/type_system.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-void ScaleCompute::PrepareForRun() {
-  return;
-}
-
-void ScaleCompute::Run() {
-  return;
-}
-
-template <PrecisionType Ptype_out>
-void ScaleComputeInt8<Ptype_out>::PrepareForRun() {
-  return;
-}
-
-template <PrecisionType Ptype_out> 
-void ScaleComputeInt8<Ptype_out>::Run() {
-  return;
-}
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-REGISTER_LITE_KERNEL(
-  scale, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::ScaleCompute, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
-
-REGISTER_LITE_KERNEL(
-  scale, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::ScaleComputeInt8<PRECISION(kInt8)>, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
diff --git a/lite/kernels/bm/scale_compute.h b/lite/kernels/bm/scale_compute.h
deleted file mode 100644
index 099bc832b8..0000000000
--- a/lite/kernels/bm/scale_compute.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include <algorithm>
-#include "lite/core/kernel.h"
-#include "lite/operators/scale_op.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-class ScaleCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
-  public:
-    using param_t = operators::ScaleParam;
-
-    void PrepareForRun() override;
-    void Run() override;
-
-    virtual ~ScaleCompute() = default;
-};
-
-template <PrecisionType Ptype_out>
-class ScaleComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
-  public:
-    using param_t = operators::ScaleParam;
-        
-    void PrepareForRun() override;
-    void Run() override;
-        
-    virtual ~ScaleComputeInt8() = default;
-};
-    
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
diff --git a/lite/kernels/bm/softmax_compute.cc b/lite/kernels/bm/softmax_compute.cc
deleted file mode 100644
index d18cab6721..0000000000
--- a/lite/kernels/bm/softmax_compute.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/kernels/bm/softmax_compute.h"
-#include <string>
-#include <vector>
-#include "lite/core/op_registry.h"
-#include "lite/core/type_system.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-void SoftmaxCompute::PrepareForRun() {
-  return;
-}
-
-void SoftmaxCompute::Run() {
-  return;
-}
-
-template <PrecisionType Ptype_out>
-void SoftmaxComputeInt8<Ptype_out>::PrepareForRun() {
-  return;
-}
-
-template <PrecisionType Ptype_out> 
-void SoftmaxComputeInt8<Ptype_out>::Run() {
-  return;
-}
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-REGISTER_LITE_KERNEL(
-  softmax, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::SoftmaxCompute, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
-
-REGISTER_LITE_KERNEL(
-  softmax, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::SoftmaxComputeInt8<PRECISION(kInt8)>, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
diff --git a/lite/kernels/bm/softmax_compute.h b/lite/kernels/bm/softmax_compute.h
deleted file mode 100644
index 2cd52722d0..0000000000
--- a/lite/kernels/bm/softmax_compute.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include <algorithm>
-#include "lite/core/kernel.h"
-#include "lite/operators/softmax_op.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-class SoftmaxCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
-  public:
-    using param_t = operators::SoftmaxParam;
-
-    void PrepareForRun() override;
-    void Run() override;
-
-    virtual ~SoftmaxCompute() = default;
-};
-
-template <PrecisionType Ptype_out>
-class SoftmaxComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
-  public:
-    using param_t = operators::SoftmaxParam;
-        
-    void PrepareForRun() override;
-    void Run() override;
-        
-    virtual ~SoftmaxComputeInt8() = default;
-};
-    
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-- 
GitLab