From 1d3754aa9189cdeec7473910ccc289eb977a5440 Mon Sep 17 00:00:00 2001 From: Santa An <49897975+AnBaolei1984@users.noreply.github.com> Date: Mon, 21 Sep 2020 14:45:41 +0800 Subject: [PATCH] [LITE][BM] support multiclass_nms2 and fix some issues, test=develop (#4379) --- lite/kernels/bm/bridges/box_coder_op.cc | 8 ++++- lite/kernels/bm/bridges/cast_op.cc | 3 +- lite/kernels/bm/bridges/elementwise_ops.cc | 27 +++++++++++--- lite/kernels/bm/bridges/multiclass_nms_op.cc | 38 +++++++++++++++++--- lite/kernels/bm/bridges/paddle_use_bridges.h | 1 + lite/kernels/bm/bridges/yolo_box_op.cc | 10 +++--- 6 files changed, 72 insertions(+), 15 deletions(-) diff --git a/lite/kernels/bm/bridges/box_coder_op.cc b/lite/kernels/bm/bridges/box_coder_op.cc index 9ef1824a64..999ea4dca2 100644 --- a/lite/kernels/bm/bridges/box_coder_op.cc +++ b/lite/kernels/bm/bridges/box_coder_op.cc @@ -73,10 +73,16 @@ int BoxCoderConverter(void* ctx, OpLite* op, KernelBase* kernel) { if (op_info->HasAttr("variance")) { variance = op_info->GetAttr>("variance"); } + int variance_len = variance.size(); user_cpu_param_t bm_param; bm_param.op_type = USER_PADDLE_BOX_CODER; bm_param.u.box_coder_param.axis = axis; - bm_param.u.box_coder_param.variance = &variance[0]; + CHECK_LE(variance_len, 2000); + memset(bm_param.u.box_coder_param.variance, 0, 2000 * sizeof(float)); + memcpy(bm_param.u.box_coder_param.variance, + &variance[0], + variance_len * sizeof(float)); + bm_param.u.box_coder_param.variance_len = variance_len; bm_param.u.box_coder_param.code_type = (code_type == "encode_center_size") ? 0 : 1; bm_param.u.box_coder_param.normalized = box_normalized; diff --git a/lite/kernels/bm/bridges/cast_op.cc b/lite/kernels/bm/bridges/cast_op.cc index 42c0751b92..45cc90c201 100644 --- a/lite/kernels/bm/bridges/cast_op.cc +++ b/lite/kernels/bm/bridges/cast_op.cc @@ -32,7 +32,8 @@ bool CvtDtype(int dtype, int* ptype) { *ptype = DTYPE_INT16; break; case 2: - *ptype = DTYPE_FP32; + case 3: + *ptype = DTYPE_INT32; break; case 5: *ptype = DTYPE_FP32; diff --git a/lite/kernels/bm/bridges/elementwise_ops.cc b/lite/kernels/bm/bridges/elementwise_ops.cc index 715874d418..9124821b6e 100644 --- a/lite/kernels/bm/bridges/elementwise_ops.cc +++ b/lite/kernels/bm/bridges/elementwise_ops.cc @@ -127,7 +127,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { const float* y_data = const_cast(y->mutable_data()); const float* x_data = const_cast(x->mutable_data()); auto unique_op_name = lite::subgraph::bm::UniqueName("expand_ndims"); - std::vector i_expand_shape_data(3); + std::vector i_expand_shape_data; if (x_is_const && y_is_const) { float* cpu_data = compute_elementwise_both_const(op); bm_add_const_tensor(graph->GetCompilerHandle(), @@ -157,12 +157,31 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { static_cast(unique_op_name.c_str())); name[1] = static_cast(unique_op_name.c_str()); dim[1] = 3; - i_expand_shape_data[0] = i_y_shape_data[0]; - i_expand_shape_data[1] = 1; - i_expand_shape_data[2] = 1; + i_expand_shape_data.push_back(i_y_shape_data[0]); + i_expand_shape_data.push_back(1); + i_expand_shape_data.push_back(1); shape[1] = &i_expand_shape_data[0]; y_data = nullptr; } + } else { + if (dim[1] < dim[0]) { + for (size_t i = 0; i < dim[1]; i++) { + i_expand_shape_data.push_back(i_y_shape_data[i]); + } + for (size_t i = dim[1]; i < dim[0]; i++) { + i_expand_shape_data.push_back(1); + } + add_reshape_layer_v2(graph->GetCompilerHandle(), + name[1], + shape[1], + dim[1], + static_cast(unique_op_name.c_str()), + const_cast(&i_expand_shape_data[0]), + i_expand_shape_data.size()); + dim[1] = dim[0]; + shape[1] = &i_expand_shape_data[0]; + name[1] = static_cast(unique_op_name.c_str()); + } } add_binary_layer_v2(graph->GetCompilerHandle(), name[0], diff --git a/lite/kernels/bm/bridges/multiclass_nms_op.cc b/lite/kernels/bm/bridges/multiclass_nms_op.cc index fb7d656dd2..6270dc9a30 100644 --- a/lite/kernels/bm/bridges/multiclass_nms_op.cc +++ b/lite/kernels/bm/bridges/multiclass_nms_op.cc @@ -51,7 +51,7 @@ int MultiClassNMSConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto score_threshold = op_info->GetAttr("score_threshold"); auto nms_threshold = op_info->GetAttr("nms_threshold"); auto nms_eta = op_info->GetAttr("nms_eta"); - bool normalized; + bool normalized = false; if (op_info->HasAttr("normalized")) { normalized = op_info->GetAttr("normalized"); } @@ -97,12 +97,39 @@ int MultiClassNMSConverter(void* ctx, OpLite* op, KernelBase* kernel) { in_dim[1] = score_dims.size(); in_name[0] = static_cast(boxes_var_name.c_str()); in_name[1] = static_cast(score_var_name.c_str()); - int32_t* out_shape[1]; - int32_t out_dim[1]; - const char* out_name[1]; + int32_t* out_shape[2]; + int32_t out_dim[2]; + const char* out_name[2]; out_shape[0] = &i_out_shape_data[0]; out_dim[0] = out_dims.size(); out_name[0] = static_cast(out_var_name.c_str()); + + std::vector vec_index_dim(score_dims.size()); + std::vector i_out_index_shape_data(score_dims.size()); + std::string out_index_name = ""; + if (op_type == "multiclass_nms2") { + output_num = 2; + out_index_name = op_info->Output("Index").front(); + auto out_index = scope->FindVar(out_index_name)->GetMutable(); + if (3 == score_dims.size()) { + vec_index_dim[0] = score_dims[0]; + vec_index_dim[1] = keep_top_k; + vec_index_dim[2] = 1; + } else { + vec_index_dim[0] = keep_top_k; + vec_index_dim[1] = 1; + } + DDimLite index_dims(vec_index_dim); + out_index->Resize(index_dims); + out_index->mutable_data(); + for (size_t i = 0; i < index_dims.size(); i++) { + i_out_index_shape_data[i] = static_cast(index_dims[i]); + } + out_shape[1] = &i_out_index_shape_data[0]; + out_dim[1] = index_dims.size(); + out_name[1] = static_cast(out_index_name.c_str()); + } + add_user_cpu_layer(graph->GetCompilerHandle(), input_num, in_shape, @@ -126,3 +153,6 @@ int MultiClassNMSConverter(void* ctx, OpLite* op, KernelBase* kernel) { REGISTER_SUBGRAPH_BRIDGE(multiclass_nms, kBM, paddle::lite::subgraph::bm::MultiClassNMSConverter); +REGISTER_SUBGRAPH_BRIDGE(multiclass_nms2, + kBM, + paddle::lite::subgraph::bm::MultiClassNMSConverter); diff --git a/lite/kernels/bm/bridges/paddle_use_bridges.h b/lite/kernels/bm/bridges/paddle_use_bridges.h index b9b575c6df..1891e13e43 100644 --- a/lite/kernels/bm/bridges/paddle_use_bridges.h +++ b/lite/kernels/bm/bridges/paddle_use_bridges.h @@ -39,6 +39,7 @@ USE_SUBGRAPH_BRIDGE(norm, kBM); USE_SUBGRAPH_BRIDGE(prior_box, kBM); USE_SUBGRAPH_BRIDGE(box_coder, kBM); USE_SUBGRAPH_BRIDGE(multiclass_nms, kBM); +USE_SUBGRAPH_BRIDGE(multiclass_nms2, kBM); USE_SUBGRAPH_BRIDGE(nearest_interp, kBM); USE_SUBGRAPH_BRIDGE(bilinear_interp, kBM); USE_SUBGRAPH_BRIDGE(yolo_box, kBM); diff --git a/lite/kernels/bm/bridges/yolo_box_op.cc b/lite/kernels/bm/bridges/yolo_box_op.cc index a5ea07f5fd..c1f8fa100f 100644 --- a/lite/kernels/bm/bridges/yolo_box_op.cc +++ b/lite/kernels/bm/bridges/yolo_box_op.cc @@ -67,17 +67,17 @@ int YoloBoxConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto downsample_ratio = op_info->GetAttr("downsample_ratio"); auto conf_thresh = op_info->GetAttr("conf_thresh"); auto anchors = op_info->GetAttr>("anchors"); - int* anchors_buffer = static_cast(malloc(sizeof(int) * anchors.size())); - CHECK(anchors_buffer != nullptr); - memcpy(anchors_buffer, &anchors[0], sizeof(int) * anchors.size()); + CHECK_LE(anchors.size(), 2000); user_cpu_param_t bm_param; bm_param.op_type = USER_PADDLE_YOLO_BOX; bm_param.u.yolo_box_param.class_num = class_num; bm_param.u.yolo_box_param.downsample_ratio = downsample_ratio; bm_param.u.yolo_box_param.conf_thresh = conf_thresh; - bm_param.u.yolo_box_param.anchors = anchors_buffer; + memset(bm_param.u.yolo_box_param.anchors, 0, 2000 * sizeof(int)); + memcpy(bm_param.u.yolo_box_param.anchors, + &anchors[0], + anchors.size() * sizeof(int)); bm_param.u.yolo_box_param.anchors_size = anchors.size(); - memcpy(anchors_buffer, &anchors[0], sizeof(int) * anchors.size()); int32_t input_num = 2; int32_t output_num = 2; int32_t* in_shape[2]; -- GitLab