未验证 提交 91de3b45 编写于 作者: J jackzhang235 提交者: GitHub

Merge branch 'develop' into add_dropout

......@@ -316,11 +316,9 @@ void Predictor::Build(const cpp::ProgramDesc &desc,
}
}
}
#ifndef LITE_WITH_MLU
if (is_quantized_model) {
inner_places.emplace_back(Place{TARGET(kARM), PRECISION(kInt8)});
}
#endif
Program program(desc, scope_, inner_places);
......
......@@ -60,8 +60,19 @@ Node* MLUPostprocessPass::InsertCastBefore(const std::string& op_type,
CHECK(0) << "Unsupport cast type";
}
cast_op->Attach(op_desc, inst_node->AsStmt().op()->scope());
auto v_places = graph->valid_places();
for (auto it = v_places.begin(); it != v_places.end();) {
if (it->target != TARGET(kMLU) && it->target != TARGET(kHost) &&
it->target != TARGET(kX86)) {
it = v_places.erase(it);
} else {
++it;
}
}
// create kernels
auto kernels = cast_op->CreateKernels(graph->valid_places());
auto kernels = cast_op->CreateKernels(v_places);
std::vector<std::unique_ptr<KernelBase>> selected_kernels;
bool is_found = false;
for (auto& kernel : kernels) {
......@@ -150,8 +161,18 @@ Node* MLUPostprocessPass::InsertCastAfter(const std::string& op_type,
cast_op->Attach(op_desc, inst_node->AsStmt().op()->scope());
auto v_places = graph->valid_places();
for (auto it = v_places.begin(); it != v_places.end();) {
if (it->target != TARGET(kMLU) && it->target != TARGET(kHost) &&
it->target != TARGET(kX86)) {
it = v_places.erase(it);
} else {
++it;
}
}
// create kernels
auto kernels = cast_op->CreateKernels(graph->valid_places());
auto kernels = cast_op->CreateKernels(v_places);
std::vector<std::unique_ptr<KernelBase>> selected_kernels;
bool is_found = false;
for (auto& kernel : kernels) {
......
......@@ -369,6 +369,7 @@ void MulticlassNmsCompute::Run() {
}
} else {
outs->Resize({static_cast<int64_t>(num_kept), out_dim});
(void)outs->mutable_data<float>();
int offset = 0;
int* oindices = nullptr;
for (int i = 0; i < n; ++i) {
......
......@@ -3,7 +3,7 @@ if(NOT LITE_WITH_MLU)
endif()
lite_cc_library(subgraph_bridge_utility_mlu SRCS utility.cc DEPS ${mlu_builder_libs} tensor)
lite_cc_library(subgraph_bridge_tensor_mlu SRCS tensor.cc DEPS ${mlu_builder_libs})
lite_cc_library(subgraph_bridge_tensor_mlu SRCS tensor.cc DEPS ${mlu_builder_libs} subgraph_bridge_utility_mlu)
lite_cc_library(subgraph_bridge_graph_mlu SRCS graph.cc DEPS subgraph_bridge_utility_mlu subgraph_bridge_tensor_mlu)
set(mlu_subgraph_bridge_deps subgraph_bridge_registry subgraph_bridge_engine subgraph_bridge_utility_mlu subgraph_bridge_graph_mlu)
......@@ -49,6 +49,6 @@ lite_cc_test(test_fc_converter_mlu SRCS fc_op_test.cc DEPS scope optimizer targe
lite_cc_test(test_scale_converter_mlu SRCS scale_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
lite_cc_test(test_interp_converter_mlu SRCS interpolate_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
lite_cc_test(test_concat_converter_mlu SRCS concat_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
lite_cc_test(test_transpose_converter_mlu SRCS transpose_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
#lite_cc_test(test_transpose_converter_mlu SRCS transpose_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
lite_cc_test(test_dropout_converter_mlu SRCS dropout_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
message(STATUS "+++++ mlu_subgraph_bridges: ${mlu_subgraph_bridges}")
......@@ -60,6 +60,7 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) {
output_tensor->mlu_tensor()));
}
graph->FuseOp(activation_op);
CNML_CALL(cnmlDestroyBaseOp(&activation_op));
return SUCCESS;
}
......
......@@ -81,6 +81,8 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
graph->BindConstData(mean_var_name, mean);
graph->FuseOp(bn_op);
CNML_CALL(cnmlDestroyBaseOp(&bn_op));
return SUCCESS;
}
......
......@@ -60,6 +60,7 @@ int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) {
&outputs,
1));
graph->FuseOp(concat_op);
CNML_CALL(cnmlDestroyBaseOp(&concat_op));
return SUCCESS;
}
......
......@@ -278,6 +278,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
}
graph->BindConstData(filter_var_name, filter);
graph->FuseOp(conv_op);
CNML_CALL(cnmlDestroyBaseOp(&conv_op));
return REBUILD_WHEN_SHAPE_CHANGED;
}
......
......@@ -117,6 +117,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
}
graph->FuseOp(elementwise_op);
CNML_CALL(cnmlDestroyBaseOp(&elementwise_op));
cnmlBaseOp_t act_op;
if (op_type == "fusion_elementwise_add_activation") {
auto mid_tensor = graph->GetNode(out_var_name + "_mid");
......@@ -127,6 +128,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
mid_tensor->mlu_tensor(),
output_tensor->mlu_tensor()));
graph->FuseOp(act_op);
CNML_CALL(cnmlDestroyBaseOp(&act_op));
}
return REBUILD_WHEN_SHAPE_CHANGED;
}
......
......@@ -160,6 +160,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
1 / *min_element(weight_scale.begin(), weight_scale.end()));
graph->FuseOp(fc_op);
CNML_CALL(cnmlDestroyBaseOp(&fc_op));
return REBUILD_WHEN_SHAPE_CHANGED;
}
......
......@@ -49,9 +49,6 @@ class Graph {
~Graph() {
FreeConstData();
CNML_CALL(cnmlDestroyFusionOp(&fusion_op_));
for (auto op : ops_) {
CNML_CALL(cnmlDestroyBaseOp(&op));
}
#if PRINT_HW_TIME
CNRT_CALL(cnrtDestroyNotifier(&notifier_start_));
CNRT_CALL(cnrtDestroyNotifier(&notifier_end_));
......@@ -234,7 +231,6 @@ class Graph {
std::vector<void*> output_addrs_;
std::vector<std::shared_ptr<MLUTensor>> input_tensors_;
std::vector<std::shared_ptr<MLUTensor>> output_tensors_;
std::vector<cnmlBaseOp_t> ops_;
cnmlFusionOp_t fusion_op_;
std::vector<void*> const_data_storage_;
#if PRINT_HW_TIME
......
......@@ -85,6 +85,7 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
nn_param));
CNML_CALL(cnmlDestroyNearestNeighborOpParam(&nn_param));
graph->FuseOp(interp_op);
CNML_CALL(cnmlDestroyBaseOp(&interp_op));
return SUCCESS;
}
......
......@@ -121,6 +121,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
output_tensor->mlu_tensor()));
CNML_CALL(cnmlDestroyPoolOpParam(&pool_param));
graph->FuseOp(pool_op);
CNML_CALL(cnmlDestroyBaseOp(&pool_op));
return SUCCESS;
}
......
......@@ -61,6 +61,7 @@ int ScaleConverter(void* ctx, OpLite* op, KernelBase* kernel) {
alpha_tensor->mlu_tensor(),
beta_tensor->mlu_tensor()));
graph->FuseOp(scale_op);
CNML_CALL(cnmlDestroyBaseOp(&scale_op));
return SUCCESS;
}
......
......@@ -55,6 +55,7 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
graph->GetNode(x_var_name)->mlu_tensor(),
output_tensor->mlu_tensor()));
graph->FuseOp(softmax_op);
CNML_CALL(cnmlDestroyBaseOp(&softmax_op));
return SUCCESS;
}
......
......@@ -89,8 +89,9 @@ void LaunchOp(const std::shared_ptr<lite::OpLite> op,
}
graph.Compile(CNML_MLU270, 1);
graph.Compute(forward_param, queue_);
CNRT_CALL(cnrtSyncQueue(queue_));
for (auto& output_name : output_var_names) {
auto output_tensor = scope->FindMutableTensor(output_name);
Tensor temp_out;
......
......@@ -61,7 +61,7 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(graph->HasNode(x_var_name));
auto input_tensor = graph->GetNode(x_var_name);
cnmlBaseOp_t transpose_op_{nullptr};
cnmlBaseOp_t transpose_op{nullptr};
cnmlNdTransposeOpParam_t transpose_param{nullptr};
......@@ -69,12 +69,13 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
&transpose_param, axis_nhwc.data(), axis_nhwc.size()));
// Use cnmlCreatexxxOpForward to create op.
CNML_CALL(cnmlCreateNdTransposeProOp(&transpose_op_,
CNML_CALL(cnmlCreateNdTransposeProOp(&transpose_op,
input_tensor->mlu_tensor(),
output_tensor->mlu_tensor(),
transpose_param));
graph->FuseOp(transpose_op_);
graph->FuseOp(transpose_op);
CNML_CALL(cnmlDestroyBaseOp(&transpose_op));
return SUCCESS;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册