diff --git a/lite/api/cxx_api.cc b/lite/api/cxx_api.cc index 43b7c51aa9b282a0722335b61a4337004a99d66f..556a9e0af01854ff5c57a14dade72b81ed255964 100644 --- a/lite/api/cxx_api.cc +++ b/lite/api/cxx_api.cc @@ -316,11 +316,9 @@ void Predictor::Build(const cpp::ProgramDesc &desc, } } } -#ifndef LITE_WITH_MLU if (is_quantized_model) { inner_places.emplace_back(Place{TARGET(kARM), PRECISION(kInt8)}); } -#endif Program program(desc, scope_, inner_places); diff --git a/lite/core/mir/mlu_postprocess_pass.cc b/lite/core/mir/mlu_postprocess_pass.cc index c69584b2961c9a63b565536d33e36d8278f2c8ad..191f1543f3d8097ea9103a2df737c1b1ad7f7721 100644 --- a/lite/core/mir/mlu_postprocess_pass.cc +++ b/lite/core/mir/mlu_postprocess_pass.cc @@ -60,8 +60,19 @@ Node* MLUPostprocessPass::InsertCastBefore(const std::string& op_type, CHECK(0) << "Unsupport cast type"; } cast_op->Attach(op_desc, inst_node->AsStmt().op()->scope()); + + auto v_places = graph->valid_places(); + for (auto it = v_places.begin(); it != v_places.end();) { + if (it->target != TARGET(kMLU) && it->target != TARGET(kHost) && + it->target != TARGET(kX86)) { + it = v_places.erase(it); + } else { + ++it; + } + } + // create kernels - auto kernels = cast_op->CreateKernels(graph->valid_places()); + auto kernels = cast_op->CreateKernels(v_places); std::vector> selected_kernels; bool is_found = false; for (auto& kernel : kernels) { @@ -150,8 +161,18 @@ Node* MLUPostprocessPass::InsertCastAfter(const std::string& op_type, cast_op->Attach(op_desc, inst_node->AsStmt().op()->scope()); + auto v_places = graph->valid_places(); + for (auto it = v_places.begin(); it != v_places.end();) { + if (it->target != TARGET(kMLU) && it->target != TARGET(kHost) && + it->target != TARGET(kX86)) { + it = v_places.erase(it); + } else { + ++it; + } + } + // create kernels - auto kernels = cast_op->CreateKernels(graph->valid_places()); + auto kernels = cast_op->CreateKernels(v_places); std::vector> selected_kernels; bool is_found = false; for (auto& kernel : kernels) { diff --git a/lite/kernels/host/multiclass_nms_compute.cc b/lite/kernels/host/multiclass_nms_compute.cc index 9f4c2fb6f53c17f97cb9a1aaeecd493713899cab..17c8cfd9fe0e40c59441b40d29f7803d5e8aa3fe 100644 --- a/lite/kernels/host/multiclass_nms_compute.cc +++ b/lite/kernels/host/multiclass_nms_compute.cc @@ -369,6 +369,7 @@ void MulticlassNmsCompute::Run() { } } else { outs->Resize({static_cast(num_kept), out_dim}); + (void)outs->mutable_data(); int offset = 0; int* oindices = nullptr; for (int i = 0; i < n; ++i) { diff --git a/lite/kernels/mlu/bridges/CMakeLists.txt b/lite/kernels/mlu/bridges/CMakeLists.txt index ceaac8ac32670fce5d8699aede773f7e0aafc5cd..2047ca6b7aed2825398ce4ce0822dd3b9ef7e93a 100644 --- a/lite/kernels/mlu/bridges/CMakeLists.txt +++ b/lite/kernels/mlu/bridges/CMakeLists.txt @@ -3,7 +3,7 @@ if(NOT LITE_WITH_MLU) endif() lite_cc_library(subgraph_bridge_utility_mlu SRCS utility.cc DEPS ${mlu_builder_libs} tensor) -lite_cc_library(subgraph_bridge_tensor_mlu SRCS tensor.cc DEPS ${mlu_builder_libs}) +lite_cc_library(subgraph_bridge_tensor_mlu SRCS tensor.cc DEPS ${mlu_builder_libs} subgraph_bridge_utility_mlu) lite_cc_library(subgraph_bridge_graph_mlu SRCS graph.cc DEPS subgraph_bridge_utility_mlu subgraph_bridge_tensor_mlu) set(mlu_subgraph_bridge_deps subgraph_bridge_registry subgraph_bridge_engine subgraph_bridge_utility_mlu subgraph_bridge_graph_mlu) @@ -49,6 +49,6 @@ lite_cc_test(test_fc_converter_mlu SRCS fc_op_test.cc DEPS scope optimizer targe lite_cc_test(test_scale_converter_mlu SRCS scale_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) lite_cc_test(test_interp_converter_mlu SRCS interpolate_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) lite_cc_test(test_concat_converter_mlu SRCS concat_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) -lite_cc_test(test_transpose_converter_mlu SRCS transpose_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) +#lite_cc_test(test_transpose_converter_mlu SRCS transpose_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) lite_cc_test(test_dropout_converter_mlu SRCS dropout_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) message(STATUS "+++++ mlu_subgraph_bridges: ${mlu_subgraph_bridges}") diff --git a/lite/kernels/mlu/bridges/act_op.cc b/lite/kernels/mlu/bridges/act_op.cc index 286195d9d5f961288dd0156db31ff8aacae58227..039d4c26ec08cd9cefa1ca66c25ec9dd94109676 100644 --- a/lite/kernels/mlu/bridges/act_op.cc +++ b/lite/kernels/mlu/bridges/act_op.cc @@ -60,6 +60,7 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) { output_tensor->mlu_tensor())); } graph->FuseOp(activation_op); + CNML_CALL(cnmlDestroyBaseOp(&activation_op)); return SUCCESS; } diff --git a/lite/kernels/mlu/bridges/batch_norm_op.cc b/lite/kernels/mlu/bridges/batch_norm_op.cc index 7353a685dd5fd3a5bcc8c88def8ffb8b96fdde55..61f098ec8b5b867fc9971334336c65f06b5862bb 100644 --- a/lite/kernels/mlu/bridges/batch_norm_op.cc +++ b/lite/kernels/mlu/bridges/batch_norm_op.cc @@ -81,6 +81,8 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) { graph->BindConstData(mean_var_name, mean); graph->FuseOp(bn_op); + CNML_CALL(cnmlDestroyBaseOp(&bn_op)); + return SUCCESS; } diff --git a/lite/kernels/mlu/bridges/concat_op.cc b/lite/kernels/mlu/bridges/concat_op.cc index 14f0da746a00c1ea10ffae824217dbb2df84df55..1c3c0b1e35b26950ef07f7a4d63d84e0df06c4c5 100644 --- a/lite/kernels/mlu/bridges/concat_op.cc +++ b/lite/kernels/mlu/bridges/concat_op.cc @@ -60,6 +60,7 @@ int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) { &outputs, 1)); graph->FuseOp(concat_op); + CNML_CALL(cnmlDestroyBaseOp(&concat_op)); return SUCCESS; } diff --git a/lite/kernels/mlu/bridges/conv_op.cc b/lite/kernels/mlu/bridges/conv_op.cc index b32452180da78621540b671b92e2ccd27b86c075..5e88323b1efc2427c7e143dca53b21404e33742f 100644 --- a/lite/kernels/mlu/bridges/conv_op.cc +++ b/lite/kernels/mlu/bridges/conv_op.cc @@ -278,6 +278,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { } graph->BindConstData(filter_var_name, filter); graph->FuseOp(conv_op); + CNML_CALL(cnmlDestroyBaseOp(&conv_op)); return REBUILD_WHEN_SHAPE_CHANGED; } diff --git a/lite/kernels/mlu/bridges/elementwise_ops.cc b/lite/kernels/mlu/bridges/elementwise_ops.cc index f58b68290c4e1a940a859aef4af0d11845a979bd..5f7192a0628a7887dbca15d63f1ba22799d7ee4b 100644 --- a/lite/kernels/mlu/bridges/elementwise_ops.cc +++ b/lite/kernels/mlu/bridges/elementwise_ops.cc @@ -117,6 +117,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { } graph->FuseOp(elementwise_op); + CNML_CALL(cnmlDestroyBaseOp(&elementwise_op)); cnmlBaseOp_t act_op; if (op_type == "fusion_elementwise_add_activation") { auto mid_tensor = graph->GetNode(out_var_name + "_mid"); @@ -127,6 +128,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { mid_tensor->mlu_tensor(), output_tensor->mlu_tensor())); graph->FuseOp(act_op); + CNML_CALL(cnmlDestroyBaseOp(&act_op)); } return REBUILD_WHEN_SHAPE_CHANGED; } diff --git a/lite/kernels/mlu/bridges/fc_op.cc b/lite/kernels/mlu/bridges/fc_op.cc index b319374935e576172097564d936d987d7864bb47..bb0af27d4d59602dd587167ed8f0c8c43dcfb86f 100644 --- a/lite/kernels/mlu/bridges/fc_op.cc +++ b/lite/kernels/mlu/bridges/fc_op.cc @@ -160,6 +160,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) { 1 / *min_element(weight_scale.begin(), weight_scale.end())); graph->FuseOp(fc_op); + CNML_CALL(cnmlDestroyBaseOp(&fc_op)); return REBUILD_WHEN_SHAPE_CHANGED; } diff --git a/lite/kernels/mlu/bridges/graph.h b/lite/kernels/mlu/bridges/graph.h index c5bc236dbfdab4db89aa0fba68fb6c9702fcfbcd..0583a0c9533b531c824b093b22900411fda38c01 100644 --- a/lite/kernels/mlu/bridges/graph.h +++ b/lite/kernels/mlu/bridges/graph.h @@ -49,9 +49,6 @@ class Graph { ~Graph() { FreeConstData(); CNML_CALL(cnmlDestroyFusionOp(&fusion_op_)); - for (auto op : ops_) { - CNML_CALL(cnmlDestroyBaseOp(&op)); - } #if PRINT_HW_TIME CNRT_CALL(cnrtDestroyNotifier(¬ifier_start_)); CNRT_CALL(cnrtDestroyNotifier(¬ifier_end_)); @@ -234,7 +231,6 @@ class Graph { std::vector output_addrs_; std::vector> input_tensors_; std::vector> output_tensors_; - std::vector ops_; cnmlFusionOp_t fusion_op_; std::vector const_data_storage_; #if PRINT_HW_TIME diff --git a/lite/kernels/mlu/bridges/interpolate_op.cc b/lite/kernels/mlu/bridges/interpolate_op.cc index e201199824d8042abd6002ccbe5bb659a9ca2898..16fbb33be7698e72244eae92a82c59a40c83555b 100644 --- a/lite/kernels/mlu/bridges/interpolate_op.cc +++ b/lite/kernels/mlu/bridges/interpolate_op.cc @@ -85,6 +85,7 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) { nn_param)); CNML_CALL(cnmlDestroyNearestNeighborOpParam(&nn_param)); graph->FuseOp(interp_op); + CNML_CALL(cnmlDestroyBaseOp(&interp_op)); return SUCCESS; } diff --git a/lite/kernels/mlu/bridges/pool_op.cc b/lite/kernels/mlu/bridges/pool_op.cc index d9c84808dcf6eeed3fe1eee6fdf9e84d8aeee4fc..070b99c2fdec8ae2b25302be303bc9f106a3d355 100644 --- a/lite/kernels/mlu/bridges/pool_op.cc +++ b/lite/kernels/mlu/bridges/pool_op.cc @@ -121,6 +121,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { output_tensor->mlu_tensor())); CNML_CALL(cnmlDestroyPoolOpParam(&pool_param)); graph->FuseOp(pool_op); + CNML_CALL(cnmlDestroyBaseOp(&pool_op)); return SUCCESS; } diff --git a/lite/kernels/mlu/bridges/scale_op.cc b/lite/kernels/mlu/bridges/scale_op.cc index 5557602bd7576ccd71c51f52a538a45fe27f7ada..5b6b3dff7969562b19344f9eccbf219d26c3e02d 100644 --- a/lite/kernels/mlu/bridges/scale_op.cc +++ b/lite/kernels/mlu/bridges/scale_op.cc @@ -61,6 +61,7 @@ int ScaleConverter(void* ctx, OpLite* op, KernelBase* kernel) { alpha_tensor->mlu_tensor(), beta_tensor->mlu_tensor())); graph->FuseOp(scale_op); + CNML_CALL(cnmlDestroyBaseOp(&scale_op)); return SUCCESS; } diff --git a/lite/kernels/mlu/bridges/softmax_op.cc b/lite/kernels/mlu/bridges/softmax_op.cc index 17c911675718a15c7ede4888b268ffcd62b4d8ed..66e106658e0e167f00130b3e6ed13ac1ea7191bb 100644 --- a/lite/kernels/mlu/bridges/softmax_op.cc +++ b/lite/kernels/mlu/bridges/softmax_op.cc @@ -55,6 +55,7 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { graph->GetNode(x_var_name)->mlu_tensor(), output_tensor->mlu_tensor())); graph->FuseOp(softmax_op); + CNML_CALL(cnmlDestroyBaseOp(&softmax_op)); return SUCCESS; } diff --git a/lite/kernels/mlu/bridges/test_helper.cc b/lite/kernels/mlu/bridges/test_helper.cc index 377a00689ef3a27f78ae008072578ab3701cd337..7dca67fc30845f23c2c1334697094bea982ef897 100644 --- a/lite/kernels/mlu/bridges/test_helper.cc +++ b/lite/kernels/mlu/bridges/test_helper.cc @@ -89,8 +89,9 @@ void LaunchOp(const std::shared_ptr op, } graph.Compile(CNML_MLU270, 1); - graph.Compute(forward_param, queue_); + CNRT_CALL(cnrtSyncQueue(queue_)); + for (auto& output_name : output_var_names) { auto output_tensor = scope->FindMutableTensor(output_name); Tensor temp_out; diff --git a/lite/kernels/mlu/bridges/transpose_op.cc b/lite/kernels/mlu/bridges/transpose_op.cc index 5e5c5b79ebff4e4ae06e99e4a18f22ebabd4ceb5..130e8417f1f9ad5ea4bc8c0b4ffaacf20124fae7 100644 --- a/lite/kernels/mlu/bridges/transpose_op.cc +++ b/lite/kernels/mlu/bridges/transpose_op.cc @@ -61,7 +61,7 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(graph->HasNode(x_var_name)); auto input_tensor = graph->GetNode(x_var_name); - cnmlBaseOp_t transpose_op_{nullptr}; + cnmlBaseOp_t transpose_op{nullptr}; cnmlNdTransposeOpParam_t transpose_param{nullptr}; @@ -69,12 +69,13 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) { &transpose_param, axis_nhwc.data(), axis_nhwc.size())); // Use cnmlCreatexxxOpForward to create op. - CNML_CALL(cnmlCreateNdTransposeProOp(&transpose_op_, + CNML_CALL(cnmlCreateNdTransposeProOp(&transpose_op, input_tensor->mlu_tensor(), output_tensor->mlu_tensor(), transpose_param)); - graph->FuseOp(transpose_op_); + graph->FuseOp(transpose_op); + CNML_CALL(cnmlDestroyBaseOp(&transpose_op)); return SUCCESS; }