未验证 提交 91de3b45 编写于 作者: J jackzhang235 提交者: GitHub

Merge branch 'develop' into add_dropout

...@@ -316,11 +316,9 @@ void Predictor::Build(const cpp::ProgramDesc &desc, ...@@ -316,11 +316,9 @@ void Predictor::Build(const cpp::ProgramDesc &desc,
} }
} }
} }
#ifndef LITE_WITH_MLU
if (is_quantized_model) { if (is_quantized_model) {
inner_places.emplace_back(Place{TARGET(kARM), PRECISION(kInt8)}); inner_places.emplace_back(Place{TARGET(kARM), PRECISION(kInt8)});
} }
#endif
Program program(desc, scope_, inner_places); Program program(desc, scope_, inner_places);
......
...@@ -60,8 +60,19 @@ Node* MLUPostprocessPass::InsertCastBefore(const std::string& op_type, ...@@ -60,8 +60,19 @@ Node* MLUPostprocessPass::InsertCastBefore(const std::string& op_type,
CHECK(0) << "Unsupport cast type"; CHECK(0) << "Unsupport cast type";
} }
cast_op->Attach(op_desc, inst_node->AsStmt().op()->scope()); cast_op->Attach(op_desc, inst_node->AsStmt().op()->scope());
auto v_places = graph->valid_places();
for (auto it = v_places.begin(); it != v_places.end();) {
if (it->target != TARGET(kMLU) && it->target != TARGET(kHost) &&
it->target != TARGET(kX86)) {
it = v_places.erase(it);
} else {
++it;
}
}
// create kernels // create kernels
auto kernels = cast_op->CreateKernels(graph->valid_places()); auto kernels = cast_op->CreateKernels(v_places);
std::vector<std::unique_ptr<KernelBase>> selected_kernels; std::vector<std::unique_ptr<KernelBase>> selected_kernels;
bool is_found = false; bool is_found = false;
for (auto& kernel : kernels) { for (auto& kernel : kernels) {
...@@ -150,8 +161,18 @@ Node* MLUPostprocessPass::InsertCastAfter(const std::string& op_type, ...@@ -150,8 +161,18 @@ Node* MLUPostprocessPass::InsertCastAfter(const std::string& op_type,
cast_op->Attach(op_desc, inst_node->AsStmt().op()->scope()); cast_op->Attach(op_desc, inst_node->AsStmt().op()->scope());
auto v_places = graph->valid_places();
for (auto it = v_places.begin(); it != v_places.end();) {
if (it->target != TARGET(kMLU) && it->target != TARGET(kHost) &&
it->target != TARGET(kX86)) {
it = v_places.erase(it);
} else {
++it;
}
}
// create kernels // create kernels
auto kernels = cast_op->CreateKernels(graph->valid_places()); auto kernels = cast_op->CreateKernels(v_places);
std::vector<std::unique_ptr<KernelBase>> selected_kernels; std::vector<std::unique_ptr<KernelBase>> selected_kernels;
bool is_found = false; bool is_found = false;
for (auto& kernel : kernels) { for (auto& kernel : kernels) {
......
...@@ -369,6 +369,7 @@ void MulticlassNmsCompute::Run() { ...@@ -369,6 +369,7 @@ void MulticlassNmsCompute::Run() {
} }
} else { } else {
outs->Resize({static_cast<int64_t>(num_kept), out_dim}); outs->Resize({static_cast<int64_t>(num_kept), out_dim});
(void)outs->mutable_data<float>();
int offset = 0; int offset = 0;
int* oindices = nullptr; int* oindices = nullptr;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
......
...@@ -3,7 +3,7 @@ if(NOT LITE_WITH_MLU) ...@@ -3,7 +3,7 @@ if(NOT LITE_WITH_MLU)
endif() endif()
lite_cc_library(subgraph_bridge_utility_mlu SRCS utility.cc DEPS ${mlu_builder_libs} tensor) lite_cc_library(subgraph_bridge_utility_mlu SRCS utility.cc DEPS ${mlu_builder_libs} tensor)
lite_cc_library(subgraph_bridge_tensor_mlu SRCS tensor.cc DEPS ${mlu_builder_libs}) lite_cc_library(subgraph_bridge_tensor_mlu SRCS tensor.cc DEPS ${mlu_builder_libs} subgraph_bridge_utility_mlu)
lite_cc_library(subgraph_bridge_graph_mlu SRCS graph.cc DEPS subgraph_bridge_utility_mlu subgraph_bridge_tensor_mlu) lite_cc_library(subgraph_bridge_graph_mlu SRCS graph.cc DEPS subgraph_bridge_utility_mlu subgraph_bridge_tensor_mlu)
set(mlu_subgraph_bridge_deps subgraph_bridge_registry subgraph_bridge_engine subgraph_bridge_utility_mlu subgraph_bridge_graph_mlu) set(mlu_subgraph_bridge_deps subgraph_bridge_registry subgraph_bridge_engine subgraph_bridge_utility_mlu subgraph_bridge_graph_mlu)
...@@ -49,6 +49,6 @@ lite_cc_test(test_fc_converter_mlu SRCS fc_op_test.cc DEPS scope optimizer targe ...@@ -49,6 +49,6 @@ lite_cc_test(test_fc_converter_mlu SRCS fc_op_test.cc DEPS scope optimizer targe
lite_cc_test(test_scale_converter_mlu SRCS scale_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) lite_cc_test(test_scale_converter_mlu SRCS scale_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
lite_cc_test(test_interp_converter_mlu SRCS interpolate_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) lite_cc_test(test_interp_converter_mlu SRCS interpolate_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
lite_cc_test(test_concat_converter_mlu SRCS concat_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) lite_cc_test(test_concat_converter_mlu SRCS concat_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
lite_cc_test(test_transpose_converter_mlu SRCS transpose_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) #lite_cc_test(test_transpose_converter_mlu SRCS transpose_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
lite_cc_test(test_dropout_converter_mlu SRCS dropout_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) lite_cc_test(test_dropout_converter_mlu SRCS dropout_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
message(STATUS "+++++ mlu_subgraph_bridges: ${mlu_subgraph_bridges}") message(STATUS "+++++ mlu_subgraph_bridges: ${mlu_subgraph_bridges}")
...@@ -60,6 +60,7 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -60,6 +60,7 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) {
output_tensor->mlu_tensor())); output_tensor->mlu_tensor()));
} }
graph->FuseOp(activation_op); graph->FuseOp(activation_op);
CNML_CALL(cnmlDestroyBaseOp(&activation_op));
return SUCCESS; return SUCCESS;
} }
......
...@@ -81,6 +81,8 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -81,6 +81,8 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
graph->BindConstData(mean_var_name, mean); graph->BindConstData(mean_var_name, mean);
graph->FuseOp(bn_op); graph->FuseOp(bn_op);
CNML_CALL(cnmlDestroyBaseOp(&bn_op));
return SUCCESS; return SUCCESS;
} }
......
...@@ -60,6 +60,7 @@ int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -60,6 +60,7 @@ int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) {
&outputs, &outputs,
1)); 1));
graph->FuseOp(concat_op); graph->FuseOp(concat_op);
CNML_CALL(cnmlDestroyBaseOp(&concat_op));
return SUCCESS; return SUCCESS;
} }
......
...@@ -278,6 +278,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -278,6 +278,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
} }
graph->BindConstData(filter_var_name, filter); graph->BindConstData(filter_var_name, filter);
graph->FuseOp(conv_op); graph->FuseOp(conv_op);
CNML_CALL(cnmlDestroyBaseOp(&conv_op));
return REBUILD_WHEN_SHAPE_CHANGED; return REBUILD_WHEN_SHAPE_CHANGED;
} }
......
...@@ -117,6 +117,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -117,6 +117,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
} }
graph->FuseOp(elementwise_op); graph->FuseOp(elementwise_op);
CNML_CALL(cnmlDestroyBaseOp(&elementwise_op));
cnmlBaseOp_t act_op; cnmlBaseOp_t act_op;
if (op_type == "fusion_elementwise_add_activation") { if (op_type == "fusion_elementwise_add_activation") {
auto mid_tensor = graph->GetNode(out_var_name + "_mid"); auto mid_tensor = graph->GetNode(out_var_name + "_mid");
...@@ -127,6 +128,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -127,6 +128,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
mid_tensor->mlu_tensor(), mid_tensor->mlu_tensor(),
output_tensor->mlu_tensor())); output_tensor->mlu_tensor()));
graph->FuseOp(act_op); graph->FuseOp(act_op);
CNML_CALL(cnmlDestroyBaseOp(&act_op));
} }
return REBUILD_WHEN_SHAPE_CHANGED; return REBUILD_WHEN_SHAPE_CHANGED;
} }
......
...@@ -160,6 +160,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -160,6 +160,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
1 / *min_element(weight_scale.begin(), weight_scale.end())); 1 / *min_element(weight_scale.begin(), weight_scale.end()));
graph->FuseOp(fc_op); graph->FuseOp(fc_op);
CNML_CALL(cnmlDestroyBaseOp(&fc_op));
return REBUILD_WHEN_SHAPE_CHANGED; return REBUILD_WHEN_SHAPE_CHANGED;
} }
......
...@@ -49,9 +49,6 @@ class Graph { ...@@ -49,9 +49,6 @@ class Graph {
~Graph() { ~Graph() {
FreeConstData(); FreeConstData();
CNML_CALL(cnmlDestroyFusionOp(&fusion_op_)); CNML_CALL(cnmlDestroyFusionOp(&fusion_op_));
for (auto op : ops_) {
CNML_CALL(cnmlDestroyBaseOp(&op));
}
#if PRINT_HW_TIME #if PRINT_HW_TIME
CNRT_CALL(cnrtDestroyNotifier(&notifier_start_)); CNRT_CALL(cnrtDestroyNotifier(&notifier_start_));
CNRT_CALL(cnrtDestroyNotifier(&notifier_end_)); CNRT_CALL(cnrtDestroyNotifier(&notifier_end_));
...@@ -234,7 +231,6 @@ class Graph { ...@@ -234,7 +231,6 @@ class Graph {
std::vector<void*> output_addrs_; std::vector<void*> output_addrs_;
std::vector<std::shared_ptr<MLUTensor>> input_tensors_; std::vector<std::shared_ptr<MLUTensor>> input_tensors_;
std::vector<std::shared_ptr<MLUTensor>> output_tensors_; std::vector<std::shared_ptr<MLUTensor>> output_tensors_;
std::vector<cnmlBaseOp_t> ops_;
cnmlFusionOp_t fusion_op_; cnmlFusionOp_t fusion_op_;
std::vector<void*> const_data_storage_; std::vector<void*> const_data_storage_;
#if PRINT_HW_TIME #if PRINT_HW_TIME
......
...@@ -85,6 +85,7 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -85,6 +85,7 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
nn_param)); nn_param));
CNML_CALL(cnmlDestroyNearestNeighborOpParam(&nn_param)); CNML_CALL(cnmlDestroyNearestNeighborOpParam(&nn_param));
graph->FuseOp(interp_op); graph->FuseOp(interp_op);
CNML_CALL(cnmlDestroyBaseOp(&interp_op));
return SUCCESS; return SUCCESS;
} }
......
...@@ -121,6 +121,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -121,6 +121,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
output_tensor->mlu_tensor())); output_tensor->mlu_tensor()));
CNML_CALL(cnmlDestroyPoolOpParam(&pool_param)); CNML_CALL(cnmlDestroyPoolOpParam(&pool_param));
graph->FuseOp(pool_op); graph->FuseOp(pool_op);
CNML_CALL(cnmlDestroyBaseOp(&pool_op));
return SUCCESS; return SUCCESS;
} }
......
...@@ -61,6 +61,7 @@ int ScaleConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -61,6 +61,7 @@ int ScaleConverter(void* ctx, OpLite* op, KernelBase* kernel) {
alpha_tensor->mlu_tensor(), alpha_tensor->mlu_tensor(),
beta_tensor->mlu_tensor())); beta_tensor->mlu_tensor()));
graph->FuseOp(scale_op); graph->FuseOp(scale_op);
CNML_CALL(cnmlDestroyBaseOp(&scale_op));
return SUCCESS; return SUCCESS;
} }
......
...@@ -55,6 +55,7 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -55,6 +55,7 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
graph->GetNode(x_var_name)->mlu_tensor(), graph->GetNode(x_var_name)->mlu_tensor(),
output_tensor->mlu_tensor())); output_tensor->mlu_tensor()));
graph->FuseOp(softmax_op); graph->FuseOp(softmax_op);
CNML_CALL(cnmlDestroyBaseOp(&softmax_op));
return SUCCESS; return SUCCESS;
} }
......
...@@ -89,8 +89,9 @@ void LaunchOp(const std::shared_ptr<lite::OpLite> op, ...@@ -89,8 +89,9 @@ void LaunchOp(const std::shared_ptr<lite::OpLite> op,
} }
graph.Compile(CNML_MLU270, 1); graph.Compile(CNML_MLU270, 1);
graph.Compute(forward_param, queue_); graph.Compute(forward_param, queue_);
CNRT_CALL(cnrtSyncQueue(queue_));
for (auto& output_name : output_var_names) { for (auto& output_name : output_var_names) {
auto output_tensor = scope->FindMutableTensor(output_name); auto output_tensor = scope->FindMutableTensor(output_name);
Tensor temp_out; Tensor temp_out;
......
...@@ -61,7 +61,7 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -61,7 +61,7 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(graph->HasNode(x_var_name)); CHECK(graph->HasNode(x_var_name));
auto input_tensor = graph->GetNode(x_var_name); auto input_tensor = graph->GetNode(x_var_name);
cnmlBaseOp_t transpose_op_{nullptr}; cnmlBaseOp_t transpose_op{nullptr};
cnmlNdTransposeOpParam_t transpose_param{nullptr}; cnmlNdTransposeOpParam_t transpose_param{nullptr};
...@@ -69,12 +69,13 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -69,12 +69,13 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
&transpose_param, axis_nhwc.data(), axis_nhwc.size())); &transpose_param, axis_nhwc.data(), axis_nhwc.size()));
// Use cnmlCreatexxxOpForward to create op. // Use cnmlCreatexxxOpForward to create op.
CNML_CALL(cnmlCreateNdTransposeProOp(&transpose_op_, CNML_CALL(cnmlCreateNdTransposeProOp(&transpose_op,
input_tensor->mlu_tensor(), input_tensor->mlu_tensor(),
output_tensor->mlu_tensor(), output_tensor->mlu_tensor(),
transpose_param)); transpose_param));
graph->FuseOp(transpose_op_); graph->FuseOp(transpose_op);
CNML_CALL(cnmlDestroyBaseOp(&transpose_op));
return SUCCESS; return SUCCESS;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册