* change to bridge way

* fix code_style test=develop

* change to bridge way
* fix code_style test=develop
662e4d7c · cen.li · 87271863 · 662e4d7c · 662e4d7c · 662e4d7c
37 changed file
--- a/lite/api/test_resnet50_lite_bm.cc
+++ b/lite/api/test_resnet50_lite_bm.cc
@@ -35,7 +35,7 @@ void TestModel(const std::vector<Place>& valid_places) {
  //DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
  lite::Predictor predictor;
  predictor.Build(FLAGS_model_dir, "", "", valid_places);
-#if 0
+
  auto* input_tensor = predictor.GetInput(0);
  input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
  auto* data = input_tensor->mutable_data<float>();
@@ -53,7 +53,6 @@ void TestModel(const std::vector<Place>& valid_places) {
      fs >> data[i];
    }
  }
-
  for (int i = 0; i < FLAGS_warmup; ++i) {
    predictor.Run();
  }
@@ -102,7 +101,6 @@ void TestModel(const std::vector<Place>& valid_places) {
    }
  }
  LOG(INFO) << "max val:" << max_val << ", max_val_arg:" << max_val_arg;
-#endif
 }

 TEST(ResNet50, test_bm) {

--- a/lite/backends/bm/CMakeLists.txt
+++ b/lite/backends/bm/CMakeLists.txt
@@ -2,4 +2,4 @@ if (NOT LITE_WITH_BM)
    return()
 endif()

-lite_cc_library(target_wrapper_bm SRCS target_wrapper.cc DEPS ${bm_runtime_libs})
+lite_cc_library(target_wrapper_bm SRCS target_wrapper.cc bm_context.cc DEPS ${bm_runtime_libs})
--- a/lite/backends/bm/bm_context.cc
+++ b/lite/backends/bm/bm_context.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "lite/core/context.h"
+#include "bmcompiler_if.h"
+
+namespace paddle {
+namespace lite {
+
+static const char* CHIP_NAME = "BM1684";
+
+void BMContext::InitOnce() {
+    compiler_handle_ = create_bmcompiler(CHIP_NAME);
+    CHECK(NULL != compiler_handle_);
+}
+
+}  // namespace lite
+}  // namespace paddle
--- a/lite/backends/bm/target_wrapper.cc
+++ b/lite/backends/bm/target_wrapper.cc
@@ -14,13 +14,14 @@
 #include <map>
 #include "lite/backends/bm/target_wrapper.h"
 #include "bmlib_runtime.h"
+#include "bmcompiler_if.h"

 namespace paddle {
 namespace lite {

 static int g_current_device_id = 0;
 static std::map<int, bm_handle_t> g_bm_handles;
-    
+
 size_t TargetWrapperBM::num_devices() {
  int count = 0;
  bm_dev_getcount(&count);
@@ -32,7 +33,8 @@ void TargetWrapperBM::SetDevice(int id) {

  if (g_bm_handles.find(id) == g_bm_handles.end()) {
    bm_handle_t bm_handle;
-    bm_dev_request(&bm_handle, id);
+    bm_status_t ret = bm_dev_request(&bm_handle, id);
+    CHECK_EQ(ret, BM_SUCCESS) << "Failed with error code: " << (int)ret;
    g_bm_handles.insert(std::pair<int, bm_handle_t>(id, bm_handle));
  }
  return;
@@ -41,6 +43,10 @@ void TargetWrapperBM::SetDevice(int id) {
 void* TargetWrapperBM::Malloc(size_t size) {
  void* ptr{};

+  if (g_bm_handles.find(g_current_device_id) == g_bm_handles.end()) {
+      SetDevice(g_current_device_id);
+  } 
+
  bm_handle_t bm_handle = g_bm_handles.at(g_current_device_id);
  bm_device_mem_t* p_mem = (bm_device_mem_t*)malloc(sizeof(bm_device_mem_t));
  bm_malloc_device_byte(bm_handle, p_mem, size);

--- a/lite/core/context.h
+++ b/lite/core/context.h
@@ -96,11 +96,17 @@ class Context<TargetType::kBM> {
  Context() {}
  explicit Context(const BMContext& ctx);
  // NOTE: InitOnce should only be used by ContextScheduler
-  void InitOnce() {}
+  void InitOnce();
  void CopySharedTo(BMContext* ctx) {}

  std::string name() const { return "BMContext"; }
-  };
+  void* compiler_handle() { 
+    return compiler_handle_;
+  }
+
+ private:
+  void* compiler_handle_{nullptr};
+};
 #endif

 #ifdef LITE_WITH_XPU
@@ -340,7 +346,6 @@ class ContextScheduler {
  std::unique_ptr<KernelContext> NewContext(TargetType target) {
    std::unique_ptr<KernelContext> ctx(new KernelContext);

-    LOG(INFO) << "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa " << int(target) << " " << int(TARGET(kBM));
    switch (target) {
      case TARGET(kHost):
        kernel_contexts_[TargetType::kHost].As<HostContext>().CopySharedTo(

--- a/lite/core/memory.cc
+++ b/lite/core/memory.cc
@@ -40,6 +40,11 @@ void* TargetMalloc(TargetType target, size_t size) {
      data = TargetWrapper<TARGET(kFPGA)>::Malloc(size);
      break;
 #endif  // LITE_WITH_OPENCL
+#ifdef LITE_WITH_BM
+    case TargetType::kBM:
+      data = TargetWrapper<TARGET(kBM)>::Malloc(size);
+      break;
+#endif      
    default:
      LOG(FATAL) << "Unknown supported target " << TargetToStr(target);
  }
@@ -69,6 +74,11 @@ void TargetFree(TargetType target, void* data) {
      TargetWrapper<TARGET(kFPGA)>::Free(data);
      break;
 #endif  // LITE_WITH_CUDA
+#ifdef LITE_WITH_BM
+    case TargetType::kBM:
+      TargetWrapper<TARGET(kBM)>::Free(data);
+      break;
+#endif
    default:
      LOG(FATAL) << "Unknown type";
  }
@@ -95,6 +105,12 @@ void TargetCopy(TargetType target, void* dst, const void* src, size_t size) {
          dst, src, size, IoDirection::DtoD);
      break;
 #endif
+#ifdef LITE_WITH_BM
+    case TargetType::kBM:
+      TargetWrapper<TARGET(kBM)>::MemcpySync(
+          dst, src, size, IoDirection::DtoD);
+      break;
+#endif 
 #ifdef LITE_WITH_OPENCL
    case TargetType::kOpenCL:
      TargetWrapperCL::MemcpySync(dst, src, size, IoDirection::DtoD);

--- a/lite/core/memory.h
+++ b/lite/core/memory.h
@@ -25,6 +25,10 @@
 #include "lite/backends/cuda/target_wrapper.h"
 #endif  // LITE_WITH_CUDA

+#ifdef LITE_WITH_BM
+#include "lite/backends/bm/target_wrapper.h" 
+#endif  // LITE_WITH_BM
+
 namespace paddle {
 namespace lite {

@@ -71,6 +75,11 @@ void CopySync(void* dst, const void* src, size_t size, IoDirection dir) {
    case TARGET(kFPGA):
      TargetWrapper<TARGET(kFPGA)>::MemcpySync(dst, src, size, dir);
      break;
+#endif
+#ifdef LITE_WITH_BM
+    case TARGET(kBM):
+      TargetWrapper<TARGET(kBM)>::MemcpySync(dst, src, size, dir);
+      break;
 #endif
  }
 }

--- a/lite/core/mir/static_kernel_pick_pass.cc
+++ b/lite/core/mir/static_kernel_pick_pass.cc
@@ -33,6 +33,7 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
  kernel_pick_factors_.ConsiderTarget();
  kernel_pick_factors_.ConsiderPrecision();
  kernel_pick_factors_.ConsiderDataLayout();
+
  CHECK(kernel_pick_factors_.any_factor_considered())
      << "kernel_pick_factors should be specified first";
  CHECK(graph) << "graph not valid";
@@ -114,7 +115,6 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
        bool all_output_type_match = true;
        auto expect_output_type =
            out_type_int8 ? PRECISION(kInt8) : PRECISION(kFloat);
-
        for (auto& arg_name : output_arguments) {
          const Type* out_arg_ty =
              candidate.second->GetOutputDeclType(arg_name);

--- a/lite/core/mir/subgraph/generate_bm_program_pass.cc
+++ b/lite/core/mir/subgraph/generate_bm_program_pass.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/core/mir/subgraph/generate_bm_program_pass.h"
+#include <memory>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+#include "lite/core/mir/graph_visualize_pass.h"
+#include "lite/core/mir/pass_registry.h"
+#include "lite/core/mir/pattern_matcher.h"
+
+namespace paddle {
+namespace lite {
+namespace mir {
+namespace subgraph {
+
+std::shared_ptr<ge::Operator> GenerateBMProgramPass::CvtVarNode(
+    lite::mir::Node* var_node, const Scope* scope) {
+  CHECK(var_node->IsArg());
+  const auto& arg = var_node->AsArg();
+  VLOG(4) << "Convert var node " << arg.name;
+
+  auto* var = scope->FindVar(arg.name);
+  CHECK(var);
+  auto* tensor = var->GetMutable<lite::Tensor>();
+  CHECK(tensor);
+  auto dims = tensor->dims();
+  if (arg.is_weight) {
+    auto wgt = std::make_shared<ge::op::Const>(arg.name);
+    LOG(INFO) << " Convert const var node " << arg.name;
+    VLOG(4) << dims;
+    wgt->set_attr_value(lite::npu::CvtTensor(tensor));
+    return wgt;
+  } else {
+    CHECK_EQ(dims.size(), 4);
+    LOG(INFO) << "[NPU] Convert data var node " << arg.name;
+    LOG(INFO) << dims;
+    // TODO(xxx): support more types and dims size
+    ge::TensorDesc desc(ge::Shape(dims.Vectorize()),
+                        ge::Format::FORMAT_NCHW,
+                        ge::DataType::DT_FLOAT);
+
+    //   auto size = desc.GetShape().GetShapeSize();
+    //  ge::TensorUtils::SetSize(desc, size*sizeof(float));
+    //  ge::TensorUtils::SetRealDimCnt(desc, 4);
+    auto data = std::make_shared<ge::op::Data>(arg.name);
+    data->update_input_desc_x(desc);
+    return data;
+  }
+  return nullptr;
+}
+
+void GenerateNPUProgramPass::CvtAllOpNodes(
+    const std::vector<Node*>& nodes2cvt,
+    lite::kernels::npu::bridges::node_map_type* converted_vars) {
+  const auto& bridges = lite::kernels::npu::bridges::Factory::Instance();
+  const auto& cvtfunc_map = bridges.AllFunctions();
+  // return record all converted vars
+  // op node's inputs must be found in converted_vars
+  for (auto& node : nodes2cvt) {
+    lite::kernels::npu::bridges::node_map_type node_inputs;
+    auto& stmt = node->AsStmt();
+    for (auto& var_node : node->inlinks) {
+      auto& arg = var_node->AsArg();
+      // weight should be handled in the converter, so skip here
+      if (arg.is_weight) {
+        continue;
+      }
+      auto var_name = arg.name;
+      if (!converted_vars->count(var_name)) {
+        converted_vars->insert(
+            std::make_pair(var_name, CvtVarNode(var_node, stmt.op()->scope())));
+      }
+      node_inputs.insert(*converted_vars->find(var_name));
+    }
+    auto node_outputs = cvtfunc_map.at(stmt.op_type())(stmt.op(), node_inputs);
+    converted_vars->insert(node_outputs.begin(), node_outputs.end());
+  }
+}
+
+std::string GenerateNPUProgramPass::BuildNPUGraph(
+    const std::unordered_set<Node*>& op_nodes,
+    const std::unordered_set<Node*>& in_data_vars,
+    const std::unordered_set<Node*>& out_data_vars,
+    int sub_id) {
+  auto ordered_nodes = GetTopologicalOrder(op_nodes);
+  lite::kernels::npu::bridges::node_map_type converted_vars;
+  CvtAllOpNodes(ordered_nodes, &converted_vars);
+
+  std::vector<std::string> in_var_names;
+  std::vector<std::string> out_var_names;
+  std::vector<ge::Operator> inputs;
+  std::vector<ge::Operator> outputs;
+  for (auto i : in_data_vars) {
+    auto argname = i->AsArg().name;
+    in_var_names.push_back(argname);
+    inputs.push_back(*converted_vars.at(argname));
+  }
+  for (auto i : out_data_vars) {
+    auto argname = i->AsArg().name;
+    out_var_names.push_back(argname);
+    outputs.push_back(*converted_vars.at(argname));
+  }
+
+  std::string weight_var_name = "graph" + std::to_string(sub_id) + "_weights";
+  auto any_op = (*op_nodes.begin())->AsStmt().op();
+  auto weight = any_op->scope()->Var(weight_var_name)->GetMutable<Tensor>();
+  weight->set_persistable(true);
+  weight->set_precision(PRECISION(kInt8));
+  // Compiling IR graph to NPU model and store mode data into weight tensor with
+  // persistable=true, Sothat the model parser can recognize it and save it to
+  // param files
+  if (!lite::npu::BuildModel(inputs, outputs, weight)) {
+    LOG(WARNING) << "[NPU] Build NPU graph failed (subgraph=" << sub_id << ")";
+    throw std::runtime_error("Build NPU graph failed.");
+  }
+  LOG(INFO) << "[NPU] Build NPU graph success (subgraph=" << sub_id << ")";
+  return weight_var_name;
+}
+
+void GenerateBMProgramPass::GenSubgraph(
+    const std::unique_ptr<SSAGraph>& graph,
+    const std::unordered_set<Node*>& op_nodes,
+    int sub_id) {
+#if 0
+  std::unordered_set<Node*> in_data_vars;
+  std::unordered_set<Node*> in_wgt_vars;
+  std::unordered_set<Node*> out_data_vars;
+  std::unordered_set<Node*> out_unused_vars;
+  FindInputOutputVars(
+      op_nodes, &in_data_vars, &in_wgt_vars, &out_data_vars, &out_unused_vars);
+
+  auto weight_var_name =
+      BuildNPUGraph(op_nodes, in_data_vars, out_data_vars, sub_id);
+
+  auto any_op = (*op_nodes.begin())->AsStmt().op();
+  InsertNewNode(graph,
+                weight_var_name,
+                any_op->scope(),
+                any_op->valid_places(),
+                in_data_vars,
+                in_wgt_vars,
+                out_data_vars,
+                out_unused_vars);
+
+  auto nodes2rm = GetNode2rm(
+      op_nodes, {in_data_vars, in_wgt_vars, out_data_vars, out_unused_vars});
+
+  GraphSafeRemoveNodes(graph.get(), nodes2rm);
+#endif
+}
+
+void GenerateBMProgramPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
+  
+}
+
+std::unique_ptr<RuntimeProgram> GenerateBMProgramPass::GenProgram() {
+  std::unique_ptr<RuntimeProgram> program(
+      new RuntimeProgram(std::move(insts_)));
+  return program;
+}
+
+}  // namespace subgraph
+}  // namespace mir
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_MIR_PASS(generate_bm_program_pass,
+                  paddle::lite::mir::subgraph::GenerateBMProgramPass)
+    .BindTargets({TARGET(kBM)});
--- a/lite/kernels/bm/relu_compute.cc
+++ b/lite/kernels/bm/relu_compute.cc
@@ -12,48 +12,49 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "lite/kernels/bm/relu_compute.h"
+#pragma once
+
+#include <map>
+#include <memory>
 #include <string>
+#include <unordered_map>
+#include <unordered_set>
 #include <vector>
-#include "lite/core/op_registry.h"
-#include "lite/core/type_system.h"
+#include "lite/core/context.h"
+#include "lite/core/mir/pass.h"
+#include "lite/core/mir/subgraph/subgraph_program_pass.h"

 namespace paddle {
 namespace lite {
-namespace kernels {
-namespace bm {
+namespace mir {
+namespace subgraph {
+
+class GenerateBMProgramPass : public SubgraphProgramPass {
+ public:
+  using key2nodes_t = std::map<std::string, Node*>;

-void ReluCompute::PrepareForRun() {
-  return;
-}
+  void Apply(const std::unique_ptr<SSAGraph>& graph) override;
+  std::unique_ptr<RuntimeProgram> GenProgram();

-void ReluCompute::Run() {
-  return;
-}
+ protected:
+  // nodes2cvt: op nodes to convert
+  // return cvted_vars: converted var nodes
+  void CvtAllOpNodes(const std::vector<Node*>& nodes2cvt,
+                     lite::kernels::npu::bridges::node_map_type* cvted_vars);

-template <PrecisionType Ptype_out>
-void ReluComputeInt8<Ptype_out>::PrepareForRun() {
-  return;
-}
+  std::shared_ptr<ge::Operator> CvtVarNode(lite::mir::Node* var_node,
+                                           const Scope* scope);

-template <PrecisionType Ptype_out> 
-void ReluComputeInt8<Ptype_out>::Run() {
-  return;
-}
+  std::string BuildGraph(const std::unordered_set<Node*>& op_nodes,
+                            const std::unordered_set<Node*>& in_data_vars,
+                            const std::unordered_set<Node*>& out_data_vars,
+                            int sub_id);

-}  // namespace bm
-}  // namespace kernels
+ private:
+  std::vector<Instruction> insts_;
+};
+
+}  // namespace subgraph
+}  // namespace mir
 }  // namespace lite
 }  // namespace paddle
-
-REGISTER_LITE_KERNEL(
-  relu, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::ReluCompute, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
-
-REGISTER_LITE_KERNEL(
-  relu, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::ReluComputeInt8<PRECISION(kInt8)>, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
--- a/lite/kernels/bm/CMakeLists.txt
+++ b/lite/kernels/bm/CMakeLists.txt
@@ -2,16 +2,6 @@ if(NOT LITE_WITH_BM)
  return ()
 endif()

-add_kernel(conv_compute_bm BM basic SRCS conv_compute.cc DEPS ${lite_kernel_deps})
-add_kernel(calib_compute_bm BM basic SRCS calib_compute.cc DEPS ${lite_kernel_deps})
-add_kernel(pool_compute_bm BM basic SRCS pool_compute.cc DEPS ${lite_kernel_deps})
-add_kernel(elementwise_compute_bm BM basic SRCS elementwise_compute.cc DEPS ${lite_kernel_deps})
-add_kernel(relu_compute_bm BM basic SRCS relu_compute.cc DEPS ${lite_kernel_deps})
-add_kernel(softmax_compute_bm BM basic SRCS softmax_compute.cc DEPS ${lite_kernel_deps})
-add_kernel(batch_norm_compute_bm BM basic SRCS batch_norm_compute.cc DEPS ${lite_kernel_deps})
-add_kernel(scale_compute_bm BM basic SRCS scale_compute.cc DEPS ${lite_kernel_deps})
-add_kernel(mul_compute_bm BM basic SRCS mul_compute.cc DEPS ${lite_kernel_deps})
-add_kernel(io_copy_compute_bm BM basic SRCS io_copy_compute.cc DEPS ${lite_kernel_deps})
-
-message(STATUS "compile with lite BM kernels")
+add_kernel(graph_compute_bm BM basic SRCS graph_compute.cc DEPS ${lite_kernel_deps} )

+add_subdirectory(bridges)
--- a/lite/kernels/bm/batch_norm_compute.cc
+++ b/lite/kernels/bm/batch_norm_compute.cc
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/kernels/bm/batch_norm_compute.h"
-#include <string>
-#include <vector>
-#include "lite/core/op_registry.h"
-#include "lite/core/type_system.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-void BatchNormCompute::PrepareForRun() {
-  return;
-}
-
-void BatchNormCompute::Run() {
-  return;
-}
-
-template <PrecisionType Ptype_out>
-void BatchNormComputeInt8<Ptype_out>::PrepareForRun() {
-  return;
-}
-
-template <PrecisionType Ptype_out> 
-void BatchNormComputeInt8<Ptype_out>::Run() {
-  return;
-}
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-REGISTER_LITE_KERNEL(
-  batch_norm, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::BatchNormCompute, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Scale", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Mean", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Variance", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("MeanOut", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("VarianceOut", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("SavedMean", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("SavedVariance", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
-
-REGISTER_LITE_KERNEL(
-  batch_norm, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::BatchNormComputeInt8<PRECISION(kInt8)>, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Scale", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Mean", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Variance", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("MeanOut", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("VarianceOut", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("SavedMean", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("SavedVariance", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
--- a/lite/kernels/bm/bridges/CMakeLists.txt
+++ b/lite/kernels/bm/bridges/CMakeLists.txt
+lite_cc_library(bm_bridge_registry SRCS registry.cc)
+
+set(bm_bridge_deps bm_bridge_registry op)
+
+lite_cc_library(bm_bridge_act_op SRCS act_op.cc DEPS ${bm_bridge_deps})
+lite_cc_library(bm_bridge_conv_op SRCS conv_op.cc DEPS ${bm_bridge_deps})
+lite_cc_library(bm_bridge_elementwise_ops SRCS elementwise_ops.cc DEPS ${bm_bridge_deps})
+lite_cc_library(bm_bridge_pool_op SRCS pool_op.cc DEPS ${bm_bridge_deps})
+lite_cc_library(bm_bridge_softmax_op SRCS softmax_op.cc DEPS ${bm_bridge_deps})
+lite_cc_library(bm_bridge_mul_op SRCS mul_op.cc DEPS ${bm_bridge_deps})
+lite_cc_library(bm_bridge_batch_norm_op SRCS batch_norm_op.cc DEPS ${bm_bridge_deps})
+
+set(bm_bridges
+        bm_bridge_registry
+        bm_bridge_act_op
+        bm_bridge_conv_op
+        bm_bridge_elementwise_ops
+        bm_bridge_pool_op
+        bm_bridge_softmax_op
+        bm_bridge_mul_op
+        bm_bridge_batch_norm_op
+        CACHE INTERNAL "bm_bridges")
+
--- a/lite/kernels/bm/conv_compute.h
+++ b/lite/kernels/bm/conv_compute.h
@@ -12,35 +12,25 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#pragma once
-#include "lite/core/kernel.h"
+#include "lite/kernels/bm/bridges/registry.h"

 namespace paddle {
 namespace lite {
 namespace kernels {
 namespace bm {
+namespace bridges {

-class ConvCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
- public:
-  using param_t = operators::ConvParam;
-
-  void PrepareForRun() {};
-  void Run() {};
-  virtual ~ConvCompute() = default;
-};
-
-template <PrecisionType Ptype_out>
-class ConvComputeInt8
-    : public KernelLite<TARGET(kBM), PRECISION(kInt8), DATALAYOUT(kNCHW)> {
- public:
-  using param_t = operators::ConvParam;
-
-  void PrepareForRun() {};
-  void Run() {};
-  virtual ~ConvComputeInt8() = default;
-};
+node_map_type ActConverter(const std::shared_ptr<lite::OpLite> op,
+                            const node_map_type& input_nodes) {
+  // output converted nodes
+  node_map_type output_nodes;
+  return output_nodes;
+}

+}  // namespace bridges
 }  // namespace bm
 }  // namespace kernels
 }  // namespace lite
 }  // namespace paddle
+
+REGISTER_BM_BRIDGE(relu, paddle::lite::kernels::bm::bridges::ActConverter);
--- a/lite/kernels/bm/mul_compute.h
+++ b/lite/kernels/bm/mul_compute.h
@@ -12,39 +12,25 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#pragma once
-#include <algorithm>
-#include "lite/core/kernel.h"
-#include "lite/operators/mul_op.h"
+#include "lite/kernels/bm/bridges/registry.h"

 namespace paddle {
 namespace lite {
 namespace kernels {
 namespace bm {
+namespace bridges {

-class MulCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
-  public:
-    using param_t = operators::MulParam;
-
-    void PrepareForRun() override;
-    void Run() override;
-
-    virtual ~MulCompute() = default;
-};
-
-template <PrecisionType Ptype_out>
-class MulComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
-  public:
-    using param_t = operators::MulParam;
-        
-    void PrepareForRun() override;
-    void Run() override;
-        
-    virtual ~MulComputeInt8() = default;
-};
-    
+node_map_type BatchNormConverter(const std::shared_ptr<lite::OpLite> op,
+                            const node_map_type& input_nodes) {
+  // output converted nodes
+  node_map_type output_nodes;
+  return output_nodes;
+}

+}  // namespace bridges
 }  // namespace bm
 }  // namespace kernels
 }  // namespace lite
 }  // namespace paddle
+
+REGISTER_BM_BRIDGE(batch_norm, paddle::lite::kernels::bm::bridges::BatchNormConverter);
--- a/lite/kernels/bm/bridges/conv_op.cc
+++ b/lite/kernels/bm/bridges/conv_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/bm/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+namespace bridges {
+
+node_map_type ConvConverter(const std::shared_ptr<lite::OpLite> op,
+                            const node_map_type& input_nodes) {
+  // output converted nodes
+  node_map_type output_nodes;
+  return output_nodes;
+}
+
+}  // namespace bridges
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_BM_BRIDGE(conv2d, paddle::lite::kernels::bm::bridges::ConvConverter);
--- a/lite/kernels/bm/bridges/elementwise_ops.cc
+++ b/lite/kernels/bm/bridges/elementwise_ops.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/bm/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+namespace bridges {
+
+node_map_type ElementwiseConverter(const std::shared_ptr<lite::OpLite> op,
+                            const node_map_type& input_nodes) {
+  // output converted nodes
+  node_map_type output_nodes;
+  return output_nodes;
+}
+
+}  // namespace bridges
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_BM_BRIDGE(elementwise, paddle::lite::kernels::bm::bridges::ElementwiseConverter);
--- a/lite/kernels/bm/bridges/mul_op.cc
+++ b/lite/kernels/bm/bridges/mul_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/bm/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+namespace bridges {
+
+node_map_type MulConverter(const std::shared_ptr<lite::OpLite> op,
+                            const node_map_type& input_nodes) {
+  // output converted nodes
+  node_map_type output_nodes;
+  return output_nodes;
+}
+
+}  // namespace bridges
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_BM_BRIDGE(mul, paddle::lite::kernels::bm::bridges::MulConverter);
--- a/lite/kernels/bm/bridges/pool_op.cc
+++ b/lite/kernels/bm/bridges/pool_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/bm/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+namespace bridges {
+
+node_map_type PoolConverter(const std::shared_ptr<lite::OpLite> op,
+                            const node_map_type& input_nodes) {
+  // output converted nodes
+  node_map_type output_nodes;
+  return output_nodes;
+}
+
+}  // namespace bridges
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_BM_BRIDGE(pool2d, paddle::lite::kernels::bm::bridges::PoolConverter);
--- a/lite/kernels/bm/calib_compute.h
+++ b/lite/kernels/bm/calib_compute.h
@@ -12,39 +12,29 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#pragma once
-#include "lite/core/kernel.h"
-#include "lite/operators/calib_op.h"
+#include "lite/kernels/bm/bridges/registry.h"
+#include <utility>

 namespace paddle {
 namespace lite {
 namespace kernels {
 namespace bm {
+namespace bridges {

-class CalibComputeFp32ToInt8
-    : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
- public:
-  using param_t = operators::CalibParam;
+Factory& Factory::Instance() {
+  static Factory g_bm_bridge;
+  return g_bm_bridge;
+}

-  void Run() override;
+bool Factory::HasType(const std::string& op_type) const {
+  return map_.count(op_type);
+}

-  ~CalibComputeFp32ToInt8() override{};
-
- private:
-};
-
-class CalibComputeInt8ToFp32
-    : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
- public:
-  using param_t = operators::CalibParam;
-
-  void Run() override;
-
-  ~CalibComputeInt8ToFp32() override{};
-
- private:
-};
+void Factory::Insert(const std::string& op_type, const func_type& func_name) {
+  map_.insert(std::make_pair(op_type, func_name));
+}

+}  // namespace bridges
 }  // namespace bm
 }  // namespace kernels
 }  // namespace lite

--- a/lite/kernels/bm/bridges/registry.h
+++ b/lite/kernels/bm/bridges/registry.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include "lite/core/op_lite.h"
+#include "lite/utils/macros.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+namespace bridges {
+
+// var_name, bm node point
+using node_map_type =
+    std::unordered_map<std::string, std::shared_ptr<void*>>;
+
+using func_type = std::function<node_map_type(const std::shared_ptr<OpLite>,
+                                              const node_map_type&)>;
+using cvt_map_type = std::unordered_map<std::string, func_type>;
+class Factory {
+ public:
+  static Factory& Instance();
+
+  const cvt_map_type& AllFunctions() const { return map_; }
+  bool HasType(const std::string& op_type) const;
+  void Insert(const std::string& op_type, const func_type& func_name);
+  Factory() = default;
+
+ private:
+  cvt_map_type map_;
+  DISALLOW_COPY_AND_ASSIGN(Factory);
+};
+
+}  // namespace bridges
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+// some platform-independent defintion
+#if defined(_WIN32)
+#define UNUSED
+#define __builtin_expect(EXP, C) (EXP)
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#define STATIC_ASSERT_JITKERNEL_GLOBAL_NAMESPACE(uniq_name, msg)              \
+  struct __test_global_namespace_##uniq_name##__ {};                          \
+  static_assert(std::is_same<::__test_global_namespace_##uniq_name##__,       \
+                             __test_global_namespace_##uniq_name##__>::value, \
+                msg)
+
+#define REGISTER_BM_BRIDGE(op_type, cvt_func_name)                         \
+  STATIC_ASSERT_JITKERNEL_GLOBAL_NAMESPACE(                                 \
+      __reg_bm_bridge_##op_type##__,                                       \
+      "REGISTER_BM_BRIDGE must be called in global namespace only once!"); \
+  int __reg_bm_bridge_##op_type##_Insert() {                               \
+    paddle::lite::kernels::bm::bridges::Factory::Instance().Insert(        \
+        #op_type, cvt_func_name);                                           \
+    return 0;                                                               \
+  }
+
+#define USE_BM_BRIDGE(op_type)                                  \
+  extern int __reg_bm_bridge_##op_type##_Insert();              \
+  static int __reg_bm_bridge_##op_type##_Insert_return UNUSED = \
+      __reg_bm_bridge_##op_type##_Insert();
--- a/lite/kernels/bm/bridges/scale_op.cc
+++ b/lite/kernels/bm/bridges/scale_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/bm/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+namespace bridges {
+
+node_map_type ScaleConverter(const std::shared_ptr<lite::OpLite> op,
+                            const node_map_type& input_nodes) {
+  // output converted nodes
+  node_map_type output_nodes;
+  return output_nodes;
+}
+
+}  // namespace bridges
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_BM_BRIDGE(scale, paddle::lite::kernels::bm::bridges::ScaleConverter);
--- a/lite/kernels/bm/bridges/softmax_op.cc
+++ b/lite/kernels/bm/bridges/softmax_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/bm/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+namespace bridges {
+
+node_map_type SoftmaxConverter(const std::shared_ptr<lite::OpLite> op,
+                            const node_map_type& input_nodes) {
+  // output converted nodes
+  node_map_type output_nodes;
+  return output_nodes;
+}
+
+}  // namespace bridges
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_BM_BRIDGE(softmax, paddle::lite::kernels::bm::bridges::SoftmaxConverter);
--- a/lite/kernels/bm/calib_compute.cc
+++ b/lite/kernels/bm/calib_compute.cc
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/kernels/bm/calib_compute.h"
-#include <vector>
-#include "lite/core/op_registry.h"
-#include "lite/core/type_system.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-void CalibComputeFp32ToInt8::Run() {
-}
-
-void CalibComputeInt8ToFp32::Run() {
-  return;
-}
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-REGISTER_LITE_KERNEL(calib,
-                     kBM,
-                     kInt8,
-                     kNCHW,
-                     paddle::lite::kernels::bm::CalibComputeFp32ToInt8,
-                     fp32_to_int8)
-    .BindInput("Input",
-               {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
-    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))})
-    .Finalize();
-
-REGISTER_LITE_KERNEL(calib,
-                     kBM,
-                     kInt8,
-                     kNCHW,
-                     paddle::lite::kernels::bm::CalibComputeInt8ToFp32,
-                     int8_to_fp32)
-    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))})
-    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
-    .Finalize();
-REGISTER_LITE_KERNEL(calib_once,
-                     kBM,
-                     kInt8,
-                     kNCHW,
-                     paddle::lite::kernels::bm::CalibComputeFp32ToInt8,
-                     fp32_to_int8)
-    .BindInput("Input",
-               {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
-    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))})
-    .Finalize();
-
-REGISTER_LITE_KERNEL(calib_once,
-                     kBM,
-                     kInt8,
-                     kNCHW,
-                     paddle::lite::kernels::bm::CalibComputeInt8ToFp32,
-                     int8_to_fp32)
-    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))})
-    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
-    .Finalize();
--- a/lite/kernels/bm/conv_compute.cc
+++ b/lite/kernels/bm/conv_compute.cc
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/kernels/bm/conv_compute.h"
-#include <vector>
-#include "lite/core/op_registry.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-template class ConvComputeInt8<PRECISION(kInt8)>;
-template class ConvComputeInt8<PRECISION(kFloat)>;
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-REGISTER_LITE_KERNEL(
-    conv2d, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::ConvCompute, def)
-    .BindInput("Input",
-               {LiteType::GetTensorTy(TARGET(kBM),
-                                      PRECISION(kFloat),
-                                      DATALAYOUT(kNCHW))})
-    .BindInput("Bias",
-               {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
-    .BindInput("Filter",
-               {LiteType::GetTensorTy(TARGET(kBM),
-                                      PRECISION(kFloat),
-                                      DATALAYOUT(kNCHW))})
-    .BindOutput("Output",
-                {LiteType::GetTensorTy(TARGET(kBM),
-                                       PRECISION(kFloat),
-                                       DATALAYOUT(kNCHW))})
-    .Finalize();
-
-REGISTER_LITE_KERNEL(
-    conv2d,
-    kBM,
-    kInt8,
-    kNCHW,
-    paddle::lite::kernels::bm::ConvComputeInt8<PRECISION(kInt8)>,
-    int8_out)
-    .BindInput("Input",
-               {LiteType::GetTensorTy(TARGET(kBM),
-                                      PRECISION(kInt8),
-                                      DATALAYOUT(kNCHW))})
-    .BindInput("Bias",
-               {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
-    .BindInput("Filter",
-               {LiteType::GetTensorTy(TARGET(kBM),
-                                      PRECISION(kInt8),
-                                      DATALAYOUT(kNCHW))})
-    .BindOutput("Output",
-                {LiteType::GetTensorTy(TARGET(kBM),
-                                       PRECISION(kFloat),
-                                       DATALAYOUT(kNCHW))})
-    .Finalize();
--- a/lite/kernels/bm/elementwise_compute.cc
+++ b/lite/kernels/bm/elementwise_compute.cc
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/kernels/bm/elementwise_compute.h"
-#include <string>
-#include <vector>
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-void ElementwiseAddCompute::Run() {
-}
-
-template <PrecisionType Ptype_out>
-void ElementwiseAddComputeInt8<Ptype_out>::Run() {
-}
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-REGISTER_LITE_KERNEL(elementwise_add,
-                     kBM,
-                     kFloat,
-                     kNCHW,
-                     paddle::lite::kernels::bm::ElementwiseAddCompute,
-                     def)
-    .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-    .BindInput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
-    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-    .Finalize();
-
-REGISTER_LITE_KERNEL(elementwise_add,
-                     kBM,
-                     kInt8,
-                     kNCHW,
-                     paddle::lite::kernels::bm::ElementwiseAddComputeInt8<PRECISION(kInt8)>,
-                     def)
-.BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-.BindInput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
-.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-.Finalize();
--- a/lite/kernels/bm/batch_norm_compute.h
+++ b/lite/kernels/bm/batch_norm_compute.h
@@ -12,39 +12,37 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#pragma once
-#include <algorithm>
-#include "lite/core/kernel.h"
-#include "lite/operators/batch_norm_op.h"
+#include "lite/kernels/bm/graph_compute.h"
+#include <sys/time.h>
+#include <time.h>
+#include <string>
+#include <vector>
+#include "lite/core/op_registry.h"
+#include "lite/core/type_system.h"

 namespace paddle {
 namespace lite {
 namespace kernels {
 namespace bm {

-class BatchNormCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
-  public:
-    using param_t = operators::BatchNormParam;
+void GraphCompute::PrepareForRun() {
+}

-    void PrepareForRun() override;
-    void Run() override;
-
-    virtual ~BatchNormCompute() = default;
-};
-
-template <PrecisionType Ptype_out>
-class BatchNormComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
-  public:
-    using param_t = operators::BatchNormParam;
-        
-    void PrepareForRun() override;
-    void Run() override;
-        
-    virtual ~BatchNormComputeInt8() = default;
-};
-    
+void GraphCompute::Run() {
+}

 }  // namespace bm
 }  // namespace kernels
 }  // namespace lite
 }  // namespace paddle
+
+REGISTER_LITE_KERNEL(graph_op,
+                     kBM,
+                     kFloat,
+                     kNCHW,
+                     paddle::lite::kernels::bm::GraphCompute,
+                     def)
+    .BindInput("Inputs", {LiteType::GetTensorTy(TARGET(kHost))})
+    .BindInput("Weight", {LiteType::GetTensorTy(TARGET(kHost))})
+    .BindOutput("Outputs", {LiteType::GetTensorTy(TARGET(kHost))})
+    .Finalize();
--- a/lite/kernels/bm/elementwise_compute.h
+++ b/lite/kernels/bm/elementwise_compute.h
@@ -13,29 +13,28 @@
 // limitations under the License.

 #pragma once
-#include <algorithm>
+
+#include <memory>
+#include <string>
+#include <vector>
 #include "lite/core/kernel.h"
 #include "lite/core/op_registry.h"
+#include "lite/core/types.h"

 namespace paddle {
 namespace lite {
 namespace kernels {
 namespace bm {

-class ElementwiseAddCompute
-    : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
+class GraphCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
 public:
-   void Run() override;
+  using param_t = operators::GraphParam;

-   virtual ~ElementwiseAddCompute() = default;
-};
+  void PrepareForRun() override;
+
+  void Run() override;

-template <PrecisionType Ptype_out>
-class ElementwiseAddComputeInt8
-    : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
-  public:
-    void Run() override;
-    virtual ~ElementwiseAddComputeInt8() = default;
+  virtual ~GraphCompute() = default;
 };

 }  // namespace bm

--- a/lite/kernels/bm/io_copy_compute.cc
+++ b/lite/kernels/bm/io_copy_compute.cc
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/backends/bm/target_wrapper.h"
-#include "lite/core/kernel.h"
-#include "lite/core/op_registry.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-using TargetW = TargetWrapper<TARGET(kBM)>;
-
-// Host to BM memory.
-void CopyFromHostSync(void* target, const void* source, size_t size) {
-  TargetW::MemcpySync(target, source, size, IoDirection::HtoD);
-}
-
-void CopyFromHostAsync(void* target,
-                       const void* source,
-                       size_t size,
-                       TargetW::stream_t stream) {
-  TargetW::MemcpyAsync(target, source, size, IoDirection::HtoD, stream);
-}
-
-// Host to Host memory.
-void CopyToHostSync(void* target, const void* source, size_t size) {
-  TargetW::MemcpySync(target, source, size, IoDirection::DtoH);
-}
-
-/*
- * This kernel copies a tensor from host to BM space.
- */
-class IoCopyHostToBMCompute
-    : public KernelLite<TARGET(kBM), PRECISION(kAny), DATALAYOUT(kAny)> {
- public:
-  void Run() override {
-    auto& param = Param<operators::IoCopyParam>();
-    CHECK(param.x->target() == TARGET(kHost) ||
-          param.x->target() == TARGET(kX86));
-    auto mem_size = param.x->memory_size();
-    VLOG(4) << "copy size " << mem_size;
-    auto* data = param.y->mutable_data(TARGET(kBM), mem_size);
-    CopyFromHostSync(data, param.x->raw_data(), mem_size);
-  }
-
-  std::unique_ptr<type_infer_handler_t> GetTypeInferHandler() override {
-    std::unique_ptr<type_infer_handler_t> res(new type_infer_handler_t);
-    *res = [](const std::map<std::string, const Type*>& inputs,
-              const std::string& out) -> const Type* {
-      CHECK(!inputs.empty());
-      auto* type = inputs.at("Input");
-      CHECK(type->target() == TARGET(kHost));
-
-      auto out_place = type->place();
-      out_place.target = TARGET(kBM);
-      auto* out_type = Type::Get(type->id(),
-                                 out_place.target,
-                                 out_place.precision,
-                                 out_place.layout,
-                                 out_place.device);
-      return out_type;
-    };
-    return res;
-  }
-
-  std::string doc() const override { return "Copy IO from HOST to BM"; }
-};
-
-/*
- * This kernel copies a tensor from BM to host space.
- */
-class IoCopyBMToHostCompute
-    : public KernelLite<TARGET(kBM), PRECISION(kAny), DATALAYOUT(kAny)> {
- public:
-  void Run() override {
-    auto& param = Param<operators::IoCopyParam>();
-    CHECK(param.x->target() == TARGET(kBM));
-    auto mem_size = param.x->memory_size();
-    VLOG(4) << "io copy bm to host " << mem_size;
-    auto* data = param.y->mutable_data(TARGET(kHost), mem_size);
-    CopyToHostSync(data, param.x->raw_data(), mem_size);
-  }
-
-  std::string doc() const override { return "Copy IO from BM to HOST"; }
-};
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-REGISTER_LITE_KERNEL(io_copy,
-                     kBM,
-                     kAny,
-                     kAny,
-                     paddle::lite::kernels::bm::IoCopyHostToBMCompute,
-                     host_to_device)
-    .BindInput("Input",
-               {LiteType::GetTensorTy(TARGET(kHost),
-                                      PRECISION(kAny),
-                                      DATALAYOUT(kAny))})
-    .BindOutput("Out",
-                {LiteType::GetTensorTy(TARGET(kBM),
-                                       PRECISION(kAny),
-                                       DATALAYOUT(kAny))})
-    .Finalize();
-
-REGISTER_LITE_KERNEL(io_copy,
-                     kBM,
-                     kAny,
-                     kAny,
-                     paddle::lite::kernels::bm::IoCopyBMToHostCompute,
-                     device_to_host)
-    .BindInput("Input",
-               {LiteType::GetTensorTy(TARGET(kBM),
-                                      PRECISION(kAny),
-                                      DATALAYOUT(kAny))})
-    .BindOutput("Out",
-                {LiteType::GetTensorTy(TARGET(kHost),
-                                       PRECISION(kAny),
-                                       DATALAYOUT(kAny))})
-    .Finalize();
-
-REGISTER_LITE_KERNEL(io_copy_once,
-                     kBM,
-                     kAny,
-                     kAny,
-                     paddle::lite::kernels::bm::IoCopyHostToBMCompute,
-                     host_to_device)
-    .BindInput("Input",
-               {LiteType::GetTensorTy(TARGET(kHost),
-                                      PRECISION(kAny),
-                                      DATALAYOUT(kAny))})
-    .BindOutput("Out",
-                {LiteType::GetTensorTy(TARGET(kBM),
-                                       PRECISION(kAny),
-                                       DATALAYOUT(kAny))})
-    .Finalize();
-
-REGISTER_LITE_KERNEL(io_copy_once,
-                     kBM,
-                     kAny,
-                     kAny,
-                     paddle::lite::kernels::bm::IoCopyBMToHostCompute,
-                     device_to_host)
-    .BindInput("Input",
-               {LiteType::GetTensorTy(TARGET(kBM),
-                                      PRECISION(kAny),
-                                      DATALAYOUT(kAny))})
-    .BindOutput("Out",
-                {LiteType::GetTensorTy(TARGET(kHost),
-                                       PRECISION(kAny),
-                                       DATALAYOUT(kAny))})
-    .Finalize();
--- a/lite/kernels/bm/mul_compute.cc
+++ b/lite/kernels/bm/mul_compute.cc
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/kernels/bm/mul_compute.h"
-#include <string>
-#include <vector>
-#include "lite/core/op_registry.h"
-#include "lite/core/type_system.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-void MulCompute::PrepareForRun() {
-  return;
-}
-
-void MulCompute::Run() {
-  return;
-}
-
-template <PrecisionType Ptype_out>
-void MulComputeInt8<Ptype_out>::PrepareForRun() {
-  return;
-}
-
-template <PrecisionType Ptype_out> 
-void MulComputeInt8<Ptype_out>::Run() {
-  return;
-}
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-REGISTER_LITE_KERNEL(
-  mul, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::MulCompute, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
-
-REGISTER_LITE_KERNEL(
-  mul, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::MulComputeInt8<PRECISION(kInt8)>, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindInput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
--- a/lite/kernels/bm/pool_compute.cc
+++ b/lite/kernels/bm/pool_compute.cc
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/kernels/bm/pool_compute.h"
-#include <string>
-#include <vector>
-#include "lite/core/op_registry.h"
-#include "lite/core/type_system.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-void PoolCompute::PrepareForRun() {
-  return;
-}
-
-void PoolCompute::Run() {
-  return;
-}
-
-template <PrecisionType Ptype_out>
-void PoolComputeInt8<Ptype_out>::PrepareForRun() {
-  return;
-}
-
-template <PrecisionType Ptype_out> 
-void PoolComputeInt8<Ptype_out>::Run() {
-  return;
-}
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-REGISTER_LITE_KERNEL(
-  pool2d, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::PoolCompute, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
-
-REGISTER_LITE_KERNEL(
-  pool2d, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::PoolComputeInt8<PRECISION(kInt8)>, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
--- a/lite/kernels/bm/pool_compute.h
+++ b/lite/kernels/bm/pool_compute.h
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include <algorithm>
-#include "lite/core/kernel.h"
-#include "lite/operators/pool_op.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-class PoolCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
-  public:
-    using param_t = operators::PoolParam;
-
-    void PrepareForRun() override;
-    void Run() override;
-
-    virtual ~PoolCompute() = default;
-};
-
-template <PrecisionType Ptype_out>
-class PoolComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
-  public:
-    using param_t = operators::PoolParam;
-        
-    void PrepareForRun() override;
-    void Run() override;
-        
-    virtual ~PoolComputeInt8() = default;
-};
-    
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
--- a/lite/kernels/bm/relu_compute.h
+++ b/lite/kernels/bm/relu_compute.h
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include <algorithm>
-#include "lite/core/kernel.h"
-#include "lite/operators/relu_op.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-class ReluCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
-  public:
-    using param_t = operators::ActivationParam;
-
-    void PrepareForRun() override;
-    void Run() override;
-
-    virtual ~ReluCompute() = default;
-};
-
-template <PrecisionType Ptype_out>
-class ReluComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
-  public:
-    using param_t = operators::ActivationParam;
-        
-    void PrepareForRun() override;
-    void Run() override;
-        
-    virtual ~ReluComputeInt8() = default;
-};
-    
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
--- a/lite/kernels/bm/scale_compute.cc
+++ b/lite/kernels/bm/scale_compute.cc
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/kernels/bm/scale_compute.h"
-#include <string>
-#include <vector>
-#include "lite/core/op_registry.h"
-#include "lite/core/type_system.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-void ScaleCompute::PrepareForRun() {
-  return;
-}
-
-void ScaleCompute::Run() {
-  return;
-}
-
-template <PrecisionType Ptype_out>
-void ScaleComputeInt8<Ptype_out>::PrepareForRun() {
-  return;
-}
-
-template <PrecisionType Ptype_out> 
-void ScaleComputeInt8<Ptype_out>::Run() {
-  return;
-}
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-REGISTER_LITE_KERNEL(
-  scale, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::ScaleCompute, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
-
-REGISTER_LITE_KERNEL(
-  scale, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::ScaleComputeInt8<PRECISION(kInt8)>, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
--- a/lite/kernels/bm/scale_compute.h
+++ b/lite/kernels/bm/scale_compute.h
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include <algorithm>
-#include "lite/core/kernel.h"
-#include "lite/operators/scale_op.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-class ScaleCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
-  public:
-    using param_t = operators::ScaleParam;
-
-    void PrepareForRun() override;
-    void Run() override;
-
-    virtual ~ScaleCompute() = default;
-};
-
-template <PrecisionType Ptype_out>
-class ScaleComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
-  public:
-    using param_t = operators::ScaleParam;
-        
-    void PrepareForRun() override;
-    void Run() override;
-        
-    virtual ~ScaleComputeInt8() = default;
-};
-    
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
--- a/lite/kernels/bm/softmax_compute.cc
+++ b/lite/kernels/bm/softmax_compute.cc
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/kernels/bm/softmax_compute.h"
-#include <string>
-#include <vector>
-#include "lite/core/op_registry.h"
-#include "lite/core/type_system.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-void SoftmaxCompute::PrepareForRun() {
-  return;
-}
-
-void SoftmaxCompute::Run() {
-  return;
-}
-
-template <PrecisionType Ptype_out>
-void SoftmaxComputeInt8<Ptype_out>::PrepareForRun() {
-  return;
-}
-
-template <PrecisionType Ptype_out> 
-void SoftmaxComputeInt8<Ptype_out>::Run() {
-  return;
-}
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-REGISTER_LITE_KERNEL(
-  softmax, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::SoftmaxCompute, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
-
-REGISTER_LITE_KERNEL(
-  softmax, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::SoftmaxComputeInt8<PRECISION(kInt8)>, def)
-  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
-  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
-  .Finalize();
--- a/lite/kernels/bm/softmax_compute.h
+++ b/lite/kernels/bm/softmax_compute.h
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include <algorithm>
-#include "lite/core/kernel.h"
-#include "lite/operators/softmax_op.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace bm {
-
-class SoftmaxCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
-  public:
-    using param_t = operators::SoftmaxParam;
-
-    void PrepareForRun() override;
-    void Run() override;
-
-    virtual ~SoftmaxCompute() = default;
-};
-
-template <PrecisionType Ptype_out>
-class SoftmaxComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
-  public:
-    using param_t = operators::SoftmaxParam;
-        
-    void PrepareForRun() override;
-    void Run() override;
-        
-    virtual ~SoftmaxComputeInt8() = default;
-};
-    
-
-}  // namespace bm
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle