* all pass ok

* fix code_style test=develop

* all pass ok
* fix code_style test=develop
87271863 · cen.li · 38b7e29f · 87271863 · 87271863 · 87271863
26 changed file
--- a/cmake/bm.cmake
+++ b/cmake/bm.cmake
@@ -74,3 +74,6 @@ else()
  add_library(bmcpu SHARED IMPORTED GLOBAL)
  set_property(TARGET bmcpu PROPERTY IMPORTED_LOCATION ${BM_SDK_CPU_LIB})
 endif()
+
+set(bm_runtime_libs bmrt bmlib bmcompiler bmcpu CACHE INTERNAL "bm runtime libs")
+set(bm_builder_libs bmrt bmlib bmcompiler bmcpu CACHE INTERNAL "bm builder libs")
--- a/cmake/configure.cmake
+++ b/cmake/configure.cmake
@@ -143,6 +143,10 @@ if (LITE_WITH_FPGA)
 add_definitions("-DLITE_WITH_FPGA")
 endif()

+if (LITE_WITH_BM)
+add_definitions("-DLITE_WITH_BM")
+endif()
+
 if (LITE_WITH_PROFILE)
    add_definitions("-DLITE_WITH_PROFILE")
    if (LITE_WITH_PRECISION_PROFILE)

--- a/lite/api/test_resnet50_lite_bm.cc
+++ b/lite/api/test_resnet50_lite_bm.cc
@@ -107,7 +107,7 @@ void TestModel(const std::vector<Place>& valid_places) {

 TEST(ResNet50, test_bm) {
  std::vector<Place> valid_places({
-      Place{TARGET(kBM), PRECISION(kInt8)}
+      Place{TARGET(kBM), PRECISION(kFloat)}
  });

  TestModel(valid_places);

--- a/lite/backends/bm/CMakeLists.txt
+++ b/lite/backends/bm/CMakeLists.txt
@@ -2,4 +2,4 @@ if (NOT LITE_WITH_BM)
    return()
 endif()

-lite_cc_library(target_wrapper_bm SRCS target_wrapper.cc)
+lite_cc_library(target_wrapper_bm SRCS target_wrapper.cc DEPS ${bm_runtime_libs})
--- a/lite/core/CMakeLists.txt
+++ b/lite/core/CMakeLists.txt
@@ -6,7 +6,8 @@ lite_cc_library(target_wrapper SRCS target_wrapper.cc
  X86_DEPS target_wrapper_x86
  CUDA_DEPS target_wrapper_cuda
  CL_DEPS cl_target_wrapper
-  FPGA_DEPS fpga_target_wrapper)
+  FPGA_DEPS fpga_target_wrapper
+  BM_DEPS target_wrapper_bm)

 lite_cc_library(memory SRCS memory.cc DEPS target_wrapper CL_DEPS cl_target_wrapper)


--- a/lite/core/context.h
+++ b/lite/core/context.h
@@ -97,9 +97,8 @@ class Context<TargetType::kBM> {
  explicit Context(const BMContext& ctx);
  // NOTE: InitOnce should only be used by ContextScheduler
  void InitOnce() {}
-  void CopySharedTo(NPUContext* ctx) {}
+  void CopySharedTo(BMContext* ctx) {}

-  BMContext& operator=(const BMContext& ctx) {}
  std::string name() const { return "BMContext"; }
  };
 #endif
@@ -340,6 +339,8 @@ class ContextScheduler {

  std::unique_ptr<KernelContext> NewContext(TargetType target) {
    std::unique_ptr<KernelContext> ctx(new KernelContext);
+
+    LOG(INFO) << "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa " << int(target) << " " << int(TARGET(kBM));
    switch (target) {
      case TARGET(kHost):
        kernel_contexts_[TargetType::kHost].As<HostContext>().CopySharedTo(
@@ -410,7 +411,6 @@ class ContextScheduler {
  void InitContext() {
    kernel_contexts_[Type].As<ContextT>().InitOnce();
  }
-
  ContextScheduler() {
    InitContext<TargetType::kHost, HostContext>();
 #ifdef LITE_WITH_X86

--- a/lite/core/optimizer.h
+++ b/lite/core/optimizer.h
@@ -57,8 +57,9 @@ class Optimizer {
    InitTargetTypeTransformPass();
    if (passes.empty()) {
      RunPasses(std::vector<std::string>{
-          {"lite_quant_dequant_fuse_pass",     //
-#if 0
+          {
+ #if 0
+          "lite_quant_dequant_fuse_pass",     //
           "lite_conv_elementwise_fuse_pass",  // conv-elemwise-bn
           "lite_conv_bn_fuse_pass",           //
           "lite_conv_elementwise_fuse_pass",  // conv-bn-elemwise
@@ -75,8 +76,9 @@ class Optimizer {
           "identity_scale_eliminate_pass",               //
 #ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
           "lite_elementwise_add_activation_fuse_pass",  //
-#endif
-           "static_kernel_pick_pass",        // pick original kernel from graph
+#endif     
+#endif        
+           "static_kernel_pick_pass",        // pick original kernel from graph     
           "variable_place_inference_pass",  // inference arg/var's
           // info(target/precision/layout/device)
           // using kernel info
@@ -115,7 +117,6 @@ class Optimizer {
           "memory_optimize_pass",
 #endif
           "argument_type_display_pass"
-#endif
           }});
    } else {
      RunPasses(passes);

--- a/lite/kernels/bm/CMakeLists.txt
+++ b/lite/kernels/bm/CMakeLists.txt
@@ -2,8 +2,16 @@ if(NOT LITE_WITH_BM)
  return ()
 endif()

-add_kernel(conv_2d_bm BM basic SRCS conv_compute.cc DEPS ${lite_kernel_deps})
+add_kernel(conv_compute_bm BM basic SRCS conv_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(calib_compute_bm BM basic SRCS calib_compute.cc DEPS ${lite_kernel_deps})
+add_kernel(pool_compute_bm BM basic SRCS pool_compute.cc DEPS ${lite_kernel_deps})
+add_kernel(elementwise_compute_bm BM basic SRCS elementwise_compute.cc DEPS ${lite_kernel_deps})
+add_kernel(relu_compute_bm BM basic SRCS relu_compute.cc DEPS ${lite_kernel_deps})
+add_kernel(softmax_compute_bm BM basic SRCS softmax_compute.cc DEPS ${lite_kernel_deps})
+add_kernel(batch_norm_compute_bm BM basic SRCS batch_norm_compute.cc DEPS ${lite_kernel_deps})
+add_kernel(scale_compute_bm BM basic SRCS scale_compute.cc DEPS ${lite_kernel_deps})
+add_kernel(mul_compute_bm BM basic SRCS mul_compute.cc DEPS ${lite_kernel_deps})
+add_kernel(io_copy_compute_bm BM basic SRCS io_copy_compute.cc DEPS ${lite_kernel_deps})

 message(STATUS "compile with lite BM kernels")

--- a/lite/kernels/bm/batch_norm_compute.cc
+++ b/lite/kernels/bm/batch_norm_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/bm/batch_norm_compute.h"
+#include <string>
+#include <vector>
+#include "lite/core/op_registry.h"
+#include "lite/core/type_system.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+
+void BatchNormCompute::PrepareForRun() {
+  return;
+}
+
+void BatchNormCompute::Run() {
+  return;
+}
+
+template <PrecisionType Ptype_out>
+void BatchNormComputeInt8<Ptype_out>::PrepareForRun() {
+  return;
+}
+
+template <PrecisionType Ptype_out> 
+void BatchNormComputeInt8<Ptype_out>::Run() {
+  return;
+}
+
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(
+  batch_norm, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::BatchNormCompute, def)
+  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindInput("Scale", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindInput("Mean", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindInput("Variance", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("MeanOut", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("VarianceOut", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("SavedMean", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("SavedVariance", {LiteType::GetTensorTy(TARGET(kBM))})
+  .Finalize();
+
+REGISTER_LITE_KERNEL(
+  batch_norm, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::BatchNormComputeInt8<PRECISION(kInt8)>, def)
+  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindInput("Scale", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindInput("Mean", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindInput("Variance", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("MeanOut", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("VarianceOut", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("SavedMean", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("SavedVariance", {LiteType::GetTensorTy(TARGET(kBM))})
+  .Finalize();
--- a/lite/kernels/bm/batch_norm_compute.h
+++ b/lite/kernels/bm/batch_norm_compute.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <algorithm>
+#include "lite/core/kernel.h"
+#include "lite/operators/batch_norm_op.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+
+class BatchNormCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
+  public:
+    using param_t = operators::BatchNormParam;
+
+    void PrepareForRun() override;
+    void Run() override;
+
+    virtual ~BatchNormCompute() = default;
+};
+
+template <PrecisionType Ptype_out>
+class BatchNormComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
+  public:
+    using param_t = operators::BatchNormParam;
+        
+    void PrepareForRun() override;
+    void Run() override;
+        
+    virtual ~BatchNormComputeInt8() = default;
+};
+    
+
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
--- a/lite/kernels/bm/calib_compute.cc
+++ b/lite/kernels/bm/calib_compute.cc
@@ -23,7 +23,6 @@ namespace kernels {
 namespace bm {

 void CalibComputeFp32ToInt8::Run() {
-  return;
 }

 void CalibComputeInt8ToFp32::Run() {

--- a/lite/kernels/bm/conv_compute.cc
+++ b/lite/kernels/bm/conv_compute.cc
@@ -66,6 +66,6 @@ REGISTER_LITE_KERNEL(
                                      DATALAYOUT(kNCHW))})
    .BindOutput("Output",
                {LiteType::GetTensorTy(TARGET(kBM),
-                                       PRECISION(kInt8),
+                                       PRECISION(kFloat),
                                       DATALAYOUT(kNCHW))})
    .Finalize();
--- a/lite/kernels/bm/elementwise_compute.cc
+++ b/lite/kernels/bm/elementwise_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/bm/elementwise_compute.h"
+#include <string>
+#include <vector>
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+
+void ElementwiseAddCompute::Run() {
+}
+
+template <PrecisionType Ptype_out>
+void ElementwiseAddComputeInt8<Ptype_out>::Run() {
+}
+
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(elementwise_add,
+                     kBM,
+                     kFloat,
+                     kNCHW,
+                     paddle::lite::kernels::bm::ElementwiseAddCompute,
+                     def)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
+    .BindInput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
+    .Finalize();
+
+REGISTER_LITE_KERNEL(elementwise_add,
+                     kBM,
+                     kInt8,
+                     kNCHW,
+                     paddle::lite::kernels::bm::ElementwiseAddComputeInt8<PRECISION(kInt8)>,
+                     def)
+.BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
+.BindInput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
+.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
+.Finalize();
--- a/lite/kernels/bm/elementwise_compute.h
+++ b/lite/kernels/bm/elementwise_compute.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <algorithm>
+#include "lite/core/kernel.h"
+#include "lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+
+class ElementwiseAddCompute
+    : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
+ public:
+   void Run() override;
+
+   virtual ~ElementwiseAddCompute() = default;
+};
+
+template <PrecisionType Ptype_out>
+class ElementwiseAddComputeInt8
+    : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
+  public:
+    void Run() override;
+    virtual ~ElementwiseAddComputeInt8() = default;
+};
+
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
--- a/lite/kernels/bm/io_copy_compute.cc
+++ b/lite/kernels/bm/io_copy_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/backends/bm/target_wrapper.h"
+#include "lite/core/kernel.h"
+#include "lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+
+using TargetW = TargetWrapper<TARGET(kBM)>;
+
+// Host to BM memory.
+void CopyFromHostSync(void* target, const void* source, size_t size) {
+  TargetW::MemcpySync(target, source, size, IoDirection::HtoD);
+}
+
+void CopyFromHostAsync(void* target,
+                       const void* source,
+                       size_t size,
+                       TargetW::stream_t stream) {
+  TargetW::MemcpyAsync(target, source, size, IoDirection::HtoD, stream);
+}
+
+// Host to Host memory.
+void CopyToHostSync(void* target, const void* source, size_t size) {
+  TargetW::MemcpySync(target, source, size, IoDirection::DtoH);
+}
+
+/*
+ * This kernel copies a tensor from host to BM space.
+ */
+class IoCopyHostToBMCompute
+    : public KernelLite<TARGET(kBM), PRECISION(kAny), DATALAYOUT(kAny)> {
+ public:
+  void Run() override {
+    auto& param = Param<operators::IoCopyParam>();
+    CHECK(param.x->target() == TARGET(kHost) ||
+          param.x->target() == TARGET(kX86));
+    auto mem_size = param.x->memory_size();
+    VLOG(4) << "copy size " << mem_size;
+    auto* data = param.y->mutable_data(TARGET(kBM), mem_size);
+    CopyFromHostSync(data, param.x->raw_data(), mem_size);
+  }
+
+  std::unique_ptr<type_infer_handler_t> GetTypeInferHandler() override {
+    std::unique_ptr<type_infer_handler_t> res(new type_infer_handler_t);
+    *res = [](const std::map<std::string, const Type*>& inputs,
+              const std::string& out) -> const Type* {
+      CHECK(!inputs.empty());
+      auto* type = inputs.at("Input");
+      CHECK(type->target() == TARGET(kHost));
+
+      auto out_place = type->place();
+      out_place.target = TARGET(kBM);
+      auto* out_type = Type::Get(type->id(),
+                                 out_place.target,
+                                 out_place.precision,
+                                 out_place.layout,
+                                 out_place.device);
+      return out_type;
+    };
+    return res;
+  }
+
+  std::string doc() const override { return "Copy IO from HOST to BM"; }
+};
+
+/*
+ * This kernel copies a tensor from BM to host space.
+ */
+class IoCopyBMToHostCompute
+    : public KernelLite<TARGET(kBM), PRECISION(kAny), DATALAYOUT(kAny)> {
+ public:
+  void Run() override {
+    auto& param = Param<operators::IoCopyParam>();
+    CHECK(param.x->target() == TARGET(kBM));
+    auto mem_size = param.x->memory_size();
+    VLOG(4) << "io copy bm to host " << mem_size;
+    auto* data = param.y->mutable_data(TARGET(kHost), mem_size);
+    CopyToHostSync(data, param.x->raw_data(), mem_size);
+  }
+
+  std::string doc() const override { return "Copy IO from BM to HOST"; }
+};
+
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(io_copy,
+                     kBM,
+                     kAny,
+                     kAny,
+                     paddle::lite::kernels::bm::IoCopyHostToBMCompute,
+                     host_to_device)
+    .BindInput("Input",
+               {LiteType::GetTensorTy(TARGET(kHost),
+                                      PRECISION(kAny),
+                                      DATALAYOUT(kAny))})
+    .BindOutput("Out",
+                {LiteType::GetTensorTy(TARGET(kBM),
+                                       PRECISION(kAny),
+                                       DATALAYOUT(kAny))})
+    .Finalize();
+
+REGISTER_LITE_KERNEL(io_copy,
+                     kBM,
+                     kAny,
+                     kAny,
+                     paddle::lite::kernels::bm::IoCopyBMToHostCompute,
+                     device_to_host)
+    .BindInput("Input",
+               {LiteType::GetTensorTy(TARGET(kBM),
+                                      PRECISION(kAny),
+                                      DATALAYOUT(kAny))})
+    .BindOutput("Out",
+                {LiteType::GetTensorTy(TARGET(kHost),
+                                       PRECISION(kAny),
+                                       DATALAYOUT(kAny))})
+    .Finalize();
+
+REGISTER_LITE_KERNEL(io_copy_once,
+                     kBM,
+                     kAny,
+                     kAny,
+                     paddle::lite::kernels::bm::IoCopyHostToBMCompute,
+                     host_to_device)
+    .BindInput("Input",
+               {LiteType::GetTensorTy(TARGET(kHost),
+                                      PRECISION(kAny),
+                                      DATALAYOUT(kAny))})
+    .BindOutput("Out",
+                {LiteType::GetTensorTy(TARGET(kBM),
+                                       PRECISION(kAny),
+                                       DATALAYOUT(kAny))})
+    .Finalize();
+
+REGISTER_LITE_KERNEL(io_copy_once,
+                     kBM,
+                     kAny,
+                     kAny,
+                     paddle::lite::kernels::bm::IoCopyBMToHostCompute,
+                     device_to_host)
+    .BindInput("Input",
+               {LiteType::GetTensorTy(TARGET(kBM),
+                                      PRECISION(kAny),
+                                      DATALAYOUT(kAny))})
+    .BindOutput("Out",
+                {LiteType::GetTensorTy(TARGET(kHost),
+                                       PRECISION(kAny),
+                                       DATALAYOUT(kAny))})
+    .Finalize();
--- a/lite/kernels/bm/mul_compute.cc
+++ b/lite/kernels/bm/mul_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/bm/mul_compute.h"
+#include <string>
+#include <vector>
+#include "lite/core/op_registry.h"
+#include "lite/core/type_system.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+
+void MulCompute::PrepareForRun() {
+  return;
+}
+
+void MulCompute::Run() {
+  return;
+}
+
+template <PrecisionType Ptype_out>
+void MulComputeInt8<Ptype_out>::PrepareForRun() {
+  return;
+}
+
+template <PrecisionType Ptype_out> 
+void MulComputeInt8<Ptype_out>::Run() {
+  return;
+}
+
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(
+  mul, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::MulCompute, def)
+  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindInput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
+  .Finalize();
+
+REGISTER_LITE_KERNEL(
+  mul, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::MulComputeInt8<PRECISION(kInt8)>, def)
+  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindInput("Y", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
+  .Finalize();
--- a/lite/kernels/bm/mul_compute.h
+++ b/lite/kernels/bm/mul_compute.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <algorithm>
+#include "lite/core/kernel.h"
+#include "lite/operators/mul_op.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+
+class MulCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
+  public:
+    using param_t = operators::MulParam;
+
+    void PrepareForRun() override;
+    void Run() override;
+
+    virtual ~MulCompute() = default;
+};
+
+template <PrecisionType Ptype_out>
+class MulComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
+  public:
+    using param_t = operators::MulParam;
+        
+    void PrepareForRun() override;
+    void Run() override;
+        
+    virtual ~MulComputeInt8() = default;
+};
+    
+
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
--- a/lite/kernels/bm/pool_compute.cc
+++ b/lite/kernels/bm/pool_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/bm/pool_compute.h"
+#include <string>
+#include <vector>
+#include "lite/core/op_registry.h"
+#include "lite/core/type_system.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+
+void PoolCompute::PrepareForRun() {
+  return;
+}
+
+void PoolCompute::Run() {
+  return;
+}
+
+template <PrecisionType Ptype_out>
+void PoolComputeInt8<Ptype_out>::PrepareForRun() {
+  return;
+}
+
+template <PrecisionType Ptype_out> 
+void PoolComputeInt8<Ptype_out>::Run() {
+  return;
+}
+
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(
+  pool2d, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::PoolCompute, def)
+  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
+  .Finalize();
+
+REGISTER_LITE_KERNEL(
+  pool2d, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::PoolComputeInt8<PRECISION(kInt8)>, def)
+  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
+  .Finalize();
--- a/lite/kernels/bm/pool_compute.h
+++ b/lite/kernels/bm/pool_compute.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <algorithm>
+#include "lite/core/kernel.h"
+#include "lite/operators/pool_op.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+
+class PoolCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
+  public:
+    using param_t = operators::PoolParam;
+
+    void PrepareForRun() override;
+    void Run() override;
+
+    virtual ~PoolCompute() = default;
+};
+
+template <PrecisionType Ptype_out>
+class PoolComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
+  public:
+    using param_t = operators::PoolParam;
+        
+    void PrepareForRun() override;
+    void Run() override;
+        
+    virtual ~PoolComputeInt8() = default;
+};
+    
+
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
--- a/lite/kernels/bm/relu_compute.cc
+++ b/lite/kernels/bm/relu_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/bm/relu_compute.h"
+#include <string>
+#include <vector>
+#include "lite/core/op_registry.h"
+#include "lite/core/type_system.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+
+void ReluCompute::PrepareForRun() {
+  return;
+}
+
+void ReluCompute::Run() {
+  return;
+}
+
+template <PrecisionType Ptype_out>
+void ReluComputeInt8<Ptype_out>::PrepareForRun() {
+  return;
+}
+
+template <PrecisionType Ptype_out> 
+void ReluComputeInt8<Ptype_out>::Run() {
+  return;
+}
+
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(
+  relu, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::ReluCompute, def)
+  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
+  .Finalize();
+
+REGISTER_LITE_KERNEL(
+  relu, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::ReluComputeInt8<PRECISION(kInt8)>, def)
+  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
+  .Finalize();
--- a/lite/kernels/bm/relu_compute.h
+++ b/lite/kernels/bm/relu_compute.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <algorithm>
+#include "lite/core/kernel.h"
+#include "lite/operators/relu_op.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+
+class ReluCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
+  public:
+    using param_t = operators::ActivationParam;
+
+    void PrepareForRun() override;
+    void Run() override;
+
+    virtual ~ReluCompute() = default;
+};
+
+template <PrecisionType Ptype_out>
+class ReluComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
+  public:
+    using param_t = operators::ActivationParam;
+        
+    void PrepareForRun() override;
+    void Run() override;
+        
+    virtual ~ReluComputeInt8() = default;
+};
+    
+
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
--- a/lite/kernels/bm/scale_compute.cc
+++ b/lite/kernels/bm/scale_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/bm/scale_compute.h"
+#include <string>
+#include <vector>
+#include "lite/core/op_registry.h"
+#include "lite/core/type_system.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+
+void ScaleCompute::PrepareForRun() {
+  return;
+}
+
+void ScaleCompute::Run() {
+  return;
+}
+
+template <PrecisionType Ptype_out>
+void ScaleComputeInt8<Ptype_out>::PrepareForRun() {
+  return;
+}
+
+template <PrecisionType Ptype_out> 
+void ScaleComputeInt8<Ptype_out>::Run() {
+  return;
+}
+
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(
+  scale, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::ScaleCompute, def)
+  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
+  .Finalize();
+
+REGISTER_LITE_KERNEL(
+  scale, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::ScaleComputeInt8<PRECISION(kInt8)>, def)
+  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
+  .Finalize();
--- a/lite/kernels/bm/scale_compute.h
+++ b/lite/kernels/bm/scale_compute.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <algorithm>
+#include "lite/core/kernel.h"
+#include "lite/operators/scale_op.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+
+class ScaleCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
+  public:
+    using param_t = operators::ScaleParam;
+
+    void PrepareForRun() override;
+    void Run() override;
+
+    virtual ~ScaleCompute() = default;
+};
+
+template <PrecisionType Ptype_out>
+class ScaleComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
+  public:
+    using param_t = operators::ScaleParam;
+        
+    void PrepareForRun() override;
+    void Run() override;
+        
+    virtual ~ScaleComputeInt8() = default;
+};
+    
+
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
--- a/lite/kernels/bm/softmax_compute.cc
+++ b/lite/kernels/bm/softmax_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/bm/softmax_compute.h"
+#include <string>
+#include <vector>
+#include "lite/core/op_registry.h"
+#include "lite/core/type_system.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+
+void SoftmaxCompute::PrepareForRun() {
+  return;
+}
+
+void SoftmaxCompute::Run() {
+  return;
+}
+
+template <PrecisionType Ptype_out>
+void SoftmaxComputeInt8<Ptype_out>::PrepareForRun() {
+  return;
+}
+
+template <PrecisionType Ptype_out> 
+void SoftmaxComputeInt8<Ptype_out>::Run() {
+  return;
+}
+
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(
+  softmax, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::SoftmaxCompute, def)
+  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
+  .Finalize();
+
+REGISTER_LITE_KERNEL(
+  softmax, kBM, kInt8, kNCHW, paddle::lite::kernels::bm::SoftmaxComputeInt8<PRECISION(kInt8)>, def)
+  .BindInput("X", {LiteType::GetTensorTy(TARGET(kBM))})
+  .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM))})
+  .Finalize();
--- a/lite/kernels/bm/softmax_compute.h
+++ b/lite/kernels/bm/softmax_compute.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <algorithm>
+#include "lite/core/kernel.h"
+#include "lite/operators/softmax_op.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace bm {
+
+class SoftmaxCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
+  public:
+    using param_t = operators::SoftmaxParam;
+
+    void PrepareForRun() override;
+    void Run() override;
+
+    virtual ~SoftmaxCompute() = default;
+};
+
+template <PrecisionType Ptype_out>
+class SoftmaxComputeInt8 : public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
+  public:
+    using param_t = operators::SoftmaxParam;
+        
+    void PrepareForRun() override;
+    void Run() override;
+        
+    virtual ~SoftmaxComputeInt8() = default;
+};
+    
+
+}  // namespace bm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
--- a/lite/operators/conv_op.h
+++ b/lite/operators/conv_op.h
@@ -112,7 +112,9 @@ class ConvOpLite : public OpLite {
    return true;
  }

-  void AttachKernel(KernelBase* kernel) override { kernel->SetParam(param_); }
+  void AttachKernel(KernelBase* kernel) override { 
+      kernel->SetParam(param_);
+  }

  std::string DebugString() const override { return "conv2d"; }