[LITE][BM] support multiclass_nms2 and fix some issues, test=develop (#4379)

1d3754aa · Santa An · GitHub · 26c0ecc8 · 1d3754aa · 1d3754aa
6 changed file
--- a/lite/kernels/bm/bridges/box_coder_op.cc
+++ b/lite/kernels/bm/bridges/box_coder_op.cc
@@ -73,10 +73,16 @@ int BoxCoderConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  if (op_info->HasAttr("variance")) {
    variance = op_info->GetAttr<std::vector<float>>("variance");
  }
+  int variance_len = variance.size();
  user_cpu_param_t bm_param;
  bm_param.op_type = USER_PADDLE_BOX_CODER;
  bm_param.u.box_coder_param.axis = axis;
-  bm_param.u.box_coder_param.variance = &variance[0];
+  CHECK_LE(variance_len, 2000);
+  memset(bm_param.u.box_coder_param.variance, 0, 2000 * sizeof(float));
+  memcpy(bm_param.u.box_coder_param.variance,
+         &variance[0],
+         variance_len * sizeof(float));
+  bm_param.u.box_coder_param.variance_len = variance_len;
  bm_param.u.box_coder_param.code_type =
      (code_type == "encode_center_size") ? 0 : 1;
  bm_param.u.box_coder_param.normalized = box_normalized;

--- a/lite/kernels/bm/bridges/cast_op.cc
+++ b/lite/kernels/bm/bridges/cast_op.cc
@@ -32,7 +32,8 @@ bool CvtDtype(int dtype, int* ptype) {
      *ptype = DTYPE_INT16;
      break;
    case 2:
-      *ptype = DTYPE_FP32;
+    case 3:
+      *ptype = DTYPE_INT32;
      break;
    case 5:
      *ptype = DTYPE_FP32;

--- a/lite/kernels/bm/bridges/elementwise_ops.cc
+++ b/lite/kernels/bm/bridges/elementwise_ops.cc
@@ -127,7 +127,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  const float* y_data = const_cast<const float*>(y->mutable_data<float>());
  const float* x_data = const_cast<const float*>(x->mutable_data<float>());
  auto unique_op_name = lite::subgraph::bm::UniqueName("expand_ndims");
-  std::vector<int32_t> i_expand_shape_data(3);
+  std::vector<int32_t> i_expand_shape_data;
  if (x_is_const && y_is_const) {
    float* cpu_data = compute_elementwise_both_const(op);
    bm_add_const_tensor(graph->GetCompilerHandle(),
@@ -157,12 +157,31 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
            static_cast<const char*>(unique_op_name.c_str()));
        name[1] = static_cast<const char*>(unique_op_name.c_str());
        dim[1] = 3;
-        i_expand_shape_data[0] = i_y_shape_data[0];
-        i_expand_shape_data[1] = 1;
-        i_expand_shape_data[2] = 1;
+        i_expand_shape_data.push_back(i_y_shape_data[0]);
+        i_expand_shape_data.push_back(1);
+        i_expand_shape_data.push_back(1);
        shape[1] = &i_expand_shape_data[0];
        y_data = nullptr;
      }
+    } else {
+      if (dim[1] < dim[0]) {
+        for (size_t i = 0; i < dim[1]; i++) {
+          i_expand_shape_data.push_back(i_y_shape_data[i]);
+        }
+        for (size_t i = dim[1]; i < dim[0]; i++) {
+          i_expand_shape_data.push_back(1);
+        }
+        add_reshape_layer_v2(graph->GetCompilerHandle(),
+                             name[1],
+                             shape[1],
+                             dim[1],
+                             static_cast<const char*>(unique_op_name.c_str()),
+                             const_cast<const int*>(&i_expand_shape_data[0]),
+                             i_expand_shape_data.size());
+        dim[1] = dim[0];
+        shape[1] = &i_expand_shape_data[0];
+        name[1] = static_cast<const char*>(unique_op_name.c_str());
+      }
    }
    add_binary_layer_v2(graph->GetCompilerHandle(),
                        name[0],

--- a/lite/kernels/bm/bridges/multiclass_nms_op.cc
+++ b/lite/kernels/bm/bridges/multiclass_nms_op.cc
@@ -51,7 +51,7 @@ int MultiClassNMSConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  auto score_threshold = op_info->GetAttr<float>("score_threshold");
  auto nms_threshold = op_info->GetAttr<float>("nms_threshold");
  auto nms_eta = op_info->GetAttr<float>("nms_eta");
-  bool normalized;
+  bool normalized = false;
  if (op_info->HasAttr("normalized")) {
    normalized = op_info->GetAttr<bool>("normalized");
  }
@@ -97,12 +97,39 @@ int MultiClassNMSConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  in_dim[1] = score_dims.size();
  in_name[0] = static_cast<const char*>(boxes_var_name.c_str());
  in_name[1] = static_cast<const char*>(score_var_name.c_str());
-  int32_t* out_shape[1];
-  int32_t out_dim[1];
-  const char* out_name[1];
+  int32_t* out_shape[2];
+  int32_t out_dim[2];
+  const char* out_name[2];
  out_shape[0] = &i_out_shape_data[0];
  out_dim[0] = out_dims.size();
  out_name[0] = static_cast<const char*>(out_var_name.c_str());
+
+  std::vector<int64_t> vec_index_dim(score_dims.size());
+  std::vector<int32_t> i_out_index_shape_data(score_dims.size());
+  std::string out_index_name = "";
+  if (op_type == "multiclass_nms2") {
+    output_num = 2;
+    out_index_name = op_info->Output("Index").front();
+    auto out_index = scope->FindVar(out_index_name)->GetMutable<lite::Tensor>();
+    if (3 == score_dims.size()) {
+      vec_index_dim[0] = score_dims[0];
+      vec_index_dim[1] = keep_top_k;
+      vec_index_dim[2] = 1;
+    } else {
+      vec_index_dim[0] = keep_top_k;
+      vec_index_dim[1] = 1;
+    }
+    DDimLite index_dims(vec_index_dim);
+    out_index->Resize(index_dims);
+    out_index->mutable_data<float>();
+    for (size_t i = 0; i < index_dims.size(); i++) {
+      i_out_index_shape_data[i] = static_cast<int32_t>(index_dims[i]);
+    }
+    out_shape[1] = &i_out_index_shape_data[0];
+    out_dim[1] = index_dims.size();
+    out_name[1] = static_cast<const char*>(out_index_name.c_str());
+  }
+
  add_user_cpu_layer(graph->GetCompilerHandle(),
                     input_num,
                     in_shape,
@@ -126,3 +153,6 @@ int MultiClassNMSConverter(void* ctx, OpLite* op, KernelBase* kernel) {
 REGISTER_SUBGRAPH_BRIDGE(multiclass_nms,
                         kBM,
                         paddle::lite::subgraph::bm::MultiClassNMSConverter);
+REGISTER_SUBGRAPH_BRIDGE(multiclass_nms2,
+                         kBM,
+                         paddle::lite::subgraph::bm::MultiClassNMSConverter);
--- a/lite/kernels/bm/bridges/paddle_use_bridges.h
+++ b/lite/kernels/bm/bridges/paddle_use_bridges.h
@@ -39,6 +39,7 @@ USE_SUBGRAPH_BRIDGE(norm, kBM);
 USE_SUBGRAPH_BRIDGE(prior_box, kBM);
 USE_SUBGRAPH_BRIDGE(box_coder, kBM);
 USE_SUBGRAPH_BRIDGE(multiclass_nms, kBM);
+USE_SUBGRAPH_BRIDGE(multiclass_nms2, kBM);
 USE_SUBGRAPH_BRIDGE(nearest_interp, kBM);
 USE_SUBGRAPH_BRIDGE(bilinear_interp, kBM);
 USE_SUBGRAPH_BRIDGE(yolo_box, kBM);

--- a/lite/kernels/bm/bridges/yolo_box_op.cc
+++ b/lite/kernels/bm/bridges/yolo_box_op.cc
@@ -67,17 +67,17 @@ int YoloBoxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  auto downsample_ratio = op_info->GetAttr<int>("downsample_ratio");
  auto conf_thresh = op_info->GetAttr<float>("conf_thresh");
  auto anchors = op_info->GetAttr<std::vector<int>>("anchors");
-  int* anchors_buffer = static_cast<int*>(malloc(sizeof(int) * anchors.size()));
-  CHECK(anchors_buffer != nullptr);
-  memcpy(anchors_buffer, &anchors[0], sizeof(int) * anchors.size());
+  CHECK_LE(anchors.size(), 2000);
  user_cpu_param_t bm_param;
  bm_param.op_type = USER_PADDLE_YOLO_BOX;
  bm_param.u.yolo_box_param.class_num = class_num;
  bm_param.u.yolo_box_param.downsample_ratio = downsample_ratio;
  bm_param.u.yolo_box_param.conf_thresh = conf_thresh;
-  bm_param.u.yolo_box_param.anchors = anchors_buffer;
+  memset(bm_param.u.yolo_box_param.anchors, 0, 2000 * sizeof(int));
+  memcpy(bm_param.u.yolo_box_param.anchors,
+         &anchors[0],
+         anchors.size() * sizeof(int));
  bm_param.u.yolo_box_param.anchors_size = anchors.size();
-  memcpy(anchors_buffer, &anchors[0], sizeof(int) * anchors.size());
  int32_t input_num = 2;
  int32_t output_num = 2;
  int32_t* in_shape[2];