Merge branch 'develop' into add_dropout

91de3b45 · jackzhang235 · GitHub · 77360ba7 · d1b35283 · 91de3b45
17 changed file
--- a/lite/api/cxx_api.cc
+++ b/lite/api/cxx_api.cc
@@ -316,11 +316,9 @@ void Predictor::Build(const cpp::ProgramDesc &desc,
      }
    }
  }
-#ifndef LITE_WITH_MLU
  if (is_quantized_model) {
    inner_places.emplace_back(Place{TARGET(kARM), PRECISION(kInt8)});
  }
-#endif
  Program program(desc, scope_, inner_places);

--- a/lite/core/mir/mlu_postprocess_pass.cc
+++ b/lite/core/mir/mlu_postprocess_pass.cc
@@ -60,8 +60,19 @@ Node* MLUPostprocessPass::InsertCastBefore(const std::string& op_type,
    CHECK(0) << "Unsupport cast type";
  }
  cast_op->Attach(op_desc, inst_node->AsStmt().op()->scope());
+  auto v_places = graph->valid_places();
+  for (auto it = v_places.begin(); it != v_places.end();) {
+    if (it->target != TARGET(kMLU) && it->target != TARGET(kHost) &&
+        it->target != TARGET(kX86)) {
+      it = v_places.erase(it);
+    } else {
+      ++it;
+    }
+  }
  // create kernels
-  auto kernels = cast_op->CreateKernels(graph->valid_places());
+  auto kernels = cast_op->CreateKernels(v_places);
  std::vector<std::unique_ptr<KernelBase>> selected_kernels;
  bool is_found = false;
  for (auto& kernel : kernels) {
@@ -150,8 +161,18 @@ Node* MLUPostprocessPass::InsertCastAfter(const std::string& op_type,
  cast_op->Attach(op_desc, inst_node->AsStmt().op()->scope());
+  auto v_places = graph->valid_places();
+  for (auto it = v_places.begin(); it != v_places.end();) {
+    if (it->target != TARGET(kMLU) && it->target != TARGET(kHost) &&
+        it->target != TARGET(kX86)) {
+      it = v_places.erase(it);
+    } else {
+      ++it;
+    }
+  }
  // create kernels
-  auto kernels = cast_op->CreateKernels(graph->valid_places());
+  auto kernels = cast_op->CreateKernels(v_places);
  std::vector<std::unique_ptr<KernelBase>> selected_kernels;
  bool is_found = false;
  for (auto& kernel : kernels) {

--- a/lite/kernels/host/multiclass_nms_compute.cc
+++ b/lite/kernels/host/multiclass_nms_compute.cc
@@ -369,6 +369,7 @@ void MulticlassNmsCompute::Run() {
    }
  } else {
    outs->Resize({static_cast<int64_t>(num_kept), out_dim});
+    (void)outs->mutable_data<float>();
    int offset = 0;
    int* oindices = nullptr;
    for (int i = 0; i < n; ++i) {

--- a/lite/kernels/mlu/bridges/CMakeLists.txt
+++ b/lite/kernels/mlu/bridges/CMakeLists.txt
@@ -3,7 +3,7 @@ if(NOT LITE_WITH_MLU)
 endif()
 lite_cc_library(subgraph_bridge_utility_mlu SRCS utility.cc DEPS ${mlu_builder_libs} tensor)
-lite_cc_library(subgraph_bridge_tensor_mlu SRCS tensor.cc DEPS ${mlu_builder_libs})
+lite_cc_library(subgraph_bridge_tensor_mlu SRCS tensor.cc DEPS ${mlu_builder_libs} subgraph_bridge_utility_mlu)
 lite_cc_library(subgraph_bridge_graph_mlu SRCS graph.cc DEPS subgraph_bridge_utility_mlu subgraph_bridge_tensor_mlu)
 set(mlu_subgraph_bridge_deps subgraph_bridge_registry subgraph_bridge_engine subgraph_bridge_utility_mlu subgraph_bridge_graph_mlu)
@@ -49,6 +49,6 @@ lite_cc_test(test_fc_converter_mlu SRCS fc_op_test.cc DEPS scope optimizer targe
 lite_cc_test(test_scale_converter_mlu SRCS scale_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
 lite_cc_test(test_interp_converter_mlu SRCS interpolate_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
 lite_cc_test(test_concat_converter_mlu SRCS concat_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
-lite_cc_test(test_transpose_converter_mlu SRCS transpose_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
+#lite_cc_test(test_transpose_converter_mlu SRCS transpose_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
 lite_cc_test(test_dropout_converter_mlu SRCS dropout_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
 message(STATUS "+++++ mlu_subgraph_bridges: ${mlu_subgraph_bridges}")
--- a/lite/kernels/mlu/bridges/act_op.cc
+++ b/lite/kernels/mlu/bridges/act_op.cc
@@ -60,6 +60,7 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) {
                                 output_tensor->mlu_tensor()));
  }
  graph->FuseOp(activation_op);
+  CNML_CALL(cnmlDestroyBaseOp(&activation_op));
  return SUCCESS;
 }

--- a/lite/kernels/mlu/bridges/batch_norm_op.cc
+++ b/lite/kernels/mlu/bridges/batch_norm_op.cc
@@ -81,6 +81,8 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  graph->BindConstData(mean_var_name, mean);
  graph->FuseOp(bn_op);
+  CNML_CALL(cnmlDestroyBaseOp(&bn_op));
  return SUCCESS;
 }

--- a/lite/kernels/mlu/bridges/concat_op.cc
+++ b/lite/kernels/mlu/bridges/concat_op.cc
@@ -60,6 +60,7 @@ int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) {
                                 &outputs,
                                 1));
  graph->FuseOp(concat_op);
+  CNML_CALL(cnmlDestroyBaseOp(&concat_op));
  return SUCCESS;
 }

--- a/lite/kernels/mlu/bridges/conv_op.cc
+++ b/lite/kernels/mlu/bridges/conv_op.cc
@@ -278,6 +278,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  }
  graph->BindConstData(filter_var_name, filter);
  graph->FuseOp(conv_op);
+  CNML_CALL(cnmlDestroyBaseOp(&conv_op));
  return REBUILD_WHEN_SHAPE_CHANGED;
 }

--- a/lite/kernels/mlu/bridges/elementwise_ops.cc
+++ b/lite/kernels/mlu/bridges/elementwise_ops.cc
@@ -117,6 +117,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  }
  graph->FuseOp(elementwise_op);
+  CNML_CALL(cnmlDestroyBaseOp(&elementwise_op));
  cnmlBaseOp_t act_op;
  if (op_type == "fusion_elementwise_add_activation") {
    auto mid_tensor = graph->GetNode(out_var_name + "_mid");
@@ -127,6 +128,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
                                 mid_tensor->mlu_tensor(),
                                 output_tensor->mlu_tensor()));
    graph->FuseOp(act_op);
+    CNML_CALL(cnmlDestroyBaseOp(&act_op));
  }
  return REBUILD_WHEN_SHAPE_CHANGED;
 }

--- a/lite/kernels/mlu/bridges/fc_op.cc
+++ b/lite/kernels/mlu/bridges/fc_op.cc
@@ -160,6 +160,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
      1 / *min_element(weight_scale.begin(), weight_scale.end()));
  graph->FuseOp(fc_op);
+  CNML_CALL(cnmlDestroyBaseOp(&fc_op));
  return REBUILD_WHEN_SHAPE_CHANGED;
 }

--- a/lite/kernels/mlu/bridges/graph.h
+++ b/lite/kernels/mlu/bridges/graph.h
@@ -49,9 +49,6 @@ class Graph {
  ~Graph() {
    FreeConstData();
    CNML_CALL(cnmlDestroyFusionOp(&fusion_op_));
-    for (auto op : ops_) {
-      CNML_CALL(cnmlDestroyBaseOp(&op));
-    }
 #if PRINT_HW_TIME
    CNRT_CALL(cnrtDestroyNotifier(&notifier_start_));
    CNRT_CALL(cnrtDestroyNotifier(&notifier_end_));
@@ -234,7 +231,6 @@ class Graph {
  std::vector<void*> output_addrs_;
  std::vector<std::shared_ptr<MLUTensor>> input_tensors_;
  std::vector<std::shared_ptr<MLUTensor>> output_tensors_;
-  std::vector<cnmlBaseOp_t> ops_;
  cnmlFusionOp_t fusion_op_;
  std::vector<void*> const_data_storage_;
 #if PRINT_HW_TIME

--- a/lite/kernels/mlu/bridges/interpolate_op.cc
+++ b/lite/kernels/mlu/bridges/interpolate_op.cc
@@ -85,6 +85,7 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
                                        nn_param));
  CNML_CALL(cnmlDestroyNearestNeighborOpParam(&nn_param));
  graph->FuseOp(interp_op);
+  CNML_CALL(cnmlDestroyBaseOp(&interp_op));
  return SUCCESS;
 }

--- a/lite/kernels/mlu/bridges/pool_op.cc
+++ b/lite/kernels/mlu/bridges/pool_op.cc
@@ -121,6 +121,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
                             output_tensor->mlu_tensor()));
  CNML_CALL(cnmlDestroyPoolOpParam(&pool_param));
  graph->FuseOp(pool_op);
+  CNML_CALL(cnmlDestroyBaseOp(&pool_op));
  return SUCCESS;
 }

--- a/lite/kernels/mlu/bridges/scale_op.cc
+++ b/lite/kernels/mlu/bridges/scale_op.cc
@@ -61,6 +61,7 @@ int ScaleConverter(void* ctx, OpLite* op, KernelBase* kernel) {
                              alpha_tensor->mlu_tensor(),
                              beta_tensor->mlu_tensor()));
  graph->FuseOp(scale_op);
+  CNML_CALL(cnmlDestroyBaseOp(&scale_op));
  return SUCCESS;
 }

--- a/lite/kernels/mlu/bridges/softmax_op.cc
+++ b/lite/kernels/mlu/bridges/softmax_op.cc
@@ -55,6 +55,7 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
                                  graph->GetNode(x_var_name)->mlu_tensor(),
                                  output_tensor->mlu_tensor()));
  graph->FuseOp(softmax_op);
+  CNML_CALL(cnmlDestroyBaseOp(&softmax_op));
  return SUCCESS;
 }

--- a/lite/kernels/mlu/bridges/test_helper.cc
+++ b/lite/kernels/mlu/bridges/test_helper.cc
@@ -89,8 +89,9 @@ void LaunchOp(const std::shared_ptr<lite::OpLite> op,
  }
  graph.Compile(CNML_MLU270, 1);
  graph.Compute(forward_param, queue_);
+  CNRT_CALL(cnrtSyncQueue(queue_));
  for (auto& output_name : output_var_names) {
    auto output_tensor = scope->FindMutableTensor(output_name);
    Tensor temp_out;

--- a/lite/kernels/mlu/bridges/transpose_op.cc
+++ b/lite/kernels/mlu/bridges/transpose_op.cc
@@ -61,7 +61,7 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  CHECK(graph->HasNode(x_var_name));
  auto input_tensor = graph->GetNode(x_var_name);
-  cnmlBaseOp_t transpose_op_{nullptr};
+  cnmlBaseOp_t transpose_op{nullptr};
  cnmlNdTransposeOpParam_t transpose_param{nullptr};
@@ -69,12 +69,13 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
      &transpose_param, axis_nhwc.data(), axis_nhwc.size()));
  // Use cnmlCreatexxxOpForward to create op.
-  CNML_CALL(cnmlCreateNdTransposeProOp(&transpose_op_,
+  CNML_CALL(cnmlCreateNdTransposeProOp(&transpose_op,
                                       input_tensor->mlu_tensor(),
                                       output_tensor->mlu_tensor(),
                                       transpose_param));
-  graph->FuseOp(transpose_op_);
+  graph->FuseOp(transpose_op);
+  CNML_CALL(cnmlDestroyBaseOp(&transpose_op));
  return SUCCESS;
 }