未验证 提交 23d2f921 编写于 作者: S Santa An 提交者: GitHub

[LITE][BM] support multiclass_nms2 and fix some issues, test=develop (#4422)

* [LITE][BM] fix input shape order changed issue,test=develop (#4407)

* [LITE][BM] support multiclass_nms2 and fix some issues, test=develop (#4379)
上级 2b310478
......@@ -23,7 +23,7 @@ int TargetWrapperBM::device_id_ = 0;
std::map<int, void*> TargetWrapperBM::bm_hds_;
size_t TargetWrapperBM::num_devices() {
int count = 0;
int count = 1;
bm_status_t ret = bm_dev_getcount(&count);
CHECK_EQ(ret, BM_SUCCESS) << "Failed with error code: "
<< static_cast<int>(ret);
......
......@@ -73,10 +73,16 @@ int BoxCoderConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (op_info->HasAttr("variance")) {
variance = op_info->GetAttr<std::vector<float>>("variance");
}
int variance_len = variance.size();
user_cpu_param_t bm_param;
bm_param.op_type = USER_PADDLE_BOX_CODER;
bm_param.u.box_coder_param.axis = axis;
bm_param.u.box_coder_param.variance = &variance[0];
CHECK_LE(variance_len, 2000);
memset(bm_param.u.box_coder_param.variance, 0, 2000 * sizeof(float));
memcpy(bm_param.u.box_coder_param.variance,
&variance[0],
variance_len * sizeof(float));
bm_param.u.box_coder_param.variance_len = variance_len;
bm_param.u.box_coder_param.code_type =
(code_type == "encode_center_size") ? 0 : 1;
bm_param.u.box_coder_param.normalized = box_normalized;
......
......@@ -32,7 +32,8 @@ bool CvtDtype(int dtype, int* ptype) {
*ptype = DTYPE_INT16;
break;
case 2:
*ptype = DTYPE_FP32;
case 3:
*ptype = DTYPE_INT32;
break;
case 5:
*ptype = DTYPE_FP32;
......
......@@ -127,7 +127,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
const float* y_data = const_cast<const float*>(y->mutable_data<float>());
const float* x_data = const_cast<const float*>(x->mutable_data<float>());
auto unique_op_name = lite::subgraph::bm::UniqueName("expand_ndims");
std::vector<int32_t> i_expand_shape_data(3);
std::vector<int32_t> i_expand_shape_data;
if (x_is_const && y_is_const) {
float* cpu_data = compute_elementwise_both_const(op);
bm_add_const_tensor(graph->GetCompilerHandle(),
......@@ -157,12 +157,31 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
static_cast<const char*>(unique_op_name.c_str()));
name[1] = static_cast<const char*>(unique_op_name.c_str());
dim[1] = 3;
i_expand_shape_data[0] = i_y_shape_data[0];
i_expand_shape_data[1] = 1;
i_expand_shape_data[2] = 1;
i_expand_shape_data.push_back(i_y_shape_data[0]);
i_expand_shape_data.push_back(1);
i_expand_shape_data.push_back(1);
shape[1] = &i_expand_shape_data[0];
y_data = nullptr;
}
} else {
if (dim[1] < dim[0]) {
for (size_t i = 0; i < dim[1]; i++) {
i_expand_shape_data.push_back(i_y_shape_data[i]);
}
for (size_t i = dim[1]; i < dim[0]; i++) {
i_expand_shape_data.push_back(1);
}
add_reshape_layer_v2(graph->GetCompilerHandle(),
name[1],
shape[1],
dim[1],
static_cast<const char*>(unique_op_name.c_str()),
const_cast<const int*>(&i_expand_shape_data[0]),
i_expand_shape_data.size());
dim[1] = dim[0];
shape[1] = &i_expand_shape_data[0];
name[1] = static_cast<const char*>(unique_op_name.c_str());
}
}
add_binary_layer_v2(graph->GetCompilerHandle(),
name[0],
......
......@@ -51,7 +51,7 @@ int MultiClassNMSConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto score_threshold = op_info->GetAttr<float>("score_threshold");
auto nms_threshold = op_info->GetAttr<float>("nms_threshold");
auto nms_eta = op_info->GetAttr<float>("nms_eta");
bool normalized;
bool normalized = false;
if (op_info->HasAttr("normalized")) {
normalized = op_info->GetAttr<bool>("normalized");
}
......@@ -97,12 +97,39 @@ int MultiClassNMSConverter(void* ctx, OpLite* op, KernelBase* kernel) {
in_dim[1] = score_dims.size();
in_name[0] = static_cast<const char*>(boxes_var_name.c_str());
in_name[1] = static_cast<const char*>(score_var_name.c_str());
int32_t* out_shape[1];
int32_t out_dim[1];
const char* out_name[1];
int32_t* out_shape[2];
int32_t out_dim[2];
const char* out_name[2];
out_shape[0] = &i_out_shape_data[0];
out_dim[0] = out_dims.size();
out_name[0] = static_cast<const char*>(out_var_name.c_str());
std::vector<int64_t> vec_index_dim(score_dims.size());
std::vector<int32_t> i_out_index_shape_data(score_dims.size());
std::string out_index_name = "";
if (op_type == "multiclass_nms2") {
output_num = 2;
out_index_name = op_info->Output("Index").front();
auto out_index = scope->FindVar(out_index_name)->GetMutable<lite::Tensor>();
if (3 == score_dims.size()) {
vec_index_dim[0] = score_dims[0];
vec_index_dim[1] = keep_top_k;
vec_index_dim[2] = 1;
} else {
vec_index_dim[0] = keep_top_k;
vec_index_dim[1] = 1;
}
DDimLite index_dims(vec_index_dim);
out_index->Resize(index_dims);
out_index->mutable_data<float>();
for (size_t i = 0; i < index_dims.size(); i++) {
i_out_index_shape_data[i] = static_cast<int32_t>(index_dims[i]);
}
out_shape[1] = &i_out_index_shape_data[0];
out_dim[1] = index_dims.size();
out_name[1] = static_cast<const char*>(out_index_name.c_str());
}
add_user_cpu_layer(graph->GetCompilerHandle(),
input_num,
in_shape,
......@@ -126,3 +153,6 @@ int MultiClassNMSConverter(void* ctx, OpLite* op, KernelBase* kernel) {
REGISTER_SUBGRAPH_BRIDGE(multiclass_nms,
kBM,
paddle::lite::subgraph::bm::MultiClassNMSConverter);
REGISTER_SUBGRAPH_BRIDGE(multiclass_nms2,
kBM,
paddle::lite::subgraph::bm::MultiClassNMSConverter);
......@@ -39,6 +39,7 @@ USE_SUBGRAPH_BRIDGE(norm, kBM);
USE_SUBGRAPH_BRIDGE(prior_box, kBM);
USE_SUBGRAPH_BRIDGE(box_coder, kBM);
USE_SUBGRAPH_BRIDGE(multiclass_nms, kBM);
USE_SUBGRAPH_BRIDGE(multiclass_nms2, kBM);
USE_SUBGRAPH_BRIDGE(nearest_interp, kBM);
USE_SUBGRAPH_BRIDGE(bilinear_interp, kBM);
USE_SUBGRAPH_BRIDGE(yolo_box, kBM);
......
......@@ -67,17 +67,17 @@ int YoloBoxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto downsample_ratio = op_info->GetAttr<int>("downsample_ratio");
auto conf_thresh = op_info->GetAttr<float>("conf_thresh");
auto anchors = op_info->GetAttr<std::vector<int>>("anchors");
int* anchors_buffer = static_cast<int*>(malloc(sizeof(int) * anchors.size()));
CHECK(anchors_buffer != nullptr);
memcpy(anchors_buffer, &anchors[0], sizeof(int) * anchors.size());
CHECK_LE(anchors.size(), 2000);
user_cpu_param_t bm_param;
bm_param.op_type = USER_PADDLE_YOLO_BOX;
bm_param.u.yolo_box_param.class_num = class_num;
bm_param.u.yolo_box_param.downsample_ratio = downsample_ratio;
bm_param.u.yolo_box_param.conf_thresh = conf_thresh;
bm_param.u.yolo_box_param.anchors = anchors_buffer;
memset(bm_param.u.yolo_box_param.anchors, 0, 2000 * sizeof(int));
memcpy(bm_param.u.yolo_box_param.anchors,
&anchors[0],
anchors.size() * sizeof(int));
bm_param.u.yolo_box_param.anchors_size = anchors.size();
memcpy(anchors_buffer, &anchors[0], sizeof(int) * anchors.size());
int32_t input_num = 2;
int32_t output_num = 2;
int32_t* in_shape[2];
......
......@@ -66,9 +66,9 @@ bool SubgraphEngine::BuildDeviceProgram() {
graph.GetCompilerHandle(), const_cast<char*>(unique_net_name.c_str()), 1);
void* bmodel_data = nullptr;
unsigned int data_size = 0;
bm_hd_ = static_cast<bm_handle_t>(ctx.GetHandle());
finish_bmcompiler_data(graph.GetCompilerHandle(), &bmodel_data, &data_size);
graph.UnlockCompilerMutex();
bm_hd_ = static_cast<bm_handle_t>(ctx.GetHandle());
bmrt_hd_ = bmrt_create(bm_hd_);
if (false == bmrt_load_bmodel_data(bmrt_hd_, bmodel_data, data_size)) {
return false;
......@@ -79,14 +79,14 @@ bool SubgraphEngine::BuildDeviceProgram() {
// input
device_inputs_.resize(input_names_.size());
for (size_t i = 0; i < input_names_.size(); i++) {
origin_itensors_[i] =
auto origin_itensor =
exec_scope_->FindMutableTensor(net_info_->input_names[i]);
CHECK(origin_itensors_[i]);
CHECK(origin_itensor);
bm_device_mem_t* p_mem =
static_cast<bm_device_mem_t*>(malloc(sizeof(bm_device_mem_t)));
CHECK(p_mem != nullptr);
CHECK_EQ(bm_malloc_device_byte(
bm_hd_, p_mem, origin_itensors_[i]->memory_size()),
CHECK_EQ(
bm_malloc_device_byte(bm_hd_, p_mem, origin_itensor->memory_size()),
BM_SUCCESS);
bmrt_tensor_with_device(&device_inputs_[i],
*p_mem,
......@@ -124,9 +124,11 @@ bool SubgraphEngine::BuildDeviceProgram() {
bool SubgraphEngine::LaunchDeviceProgram() {
for (size_t i = 0; i < device_inputs_.size(); i++) {
auto origin_itensor =
exec_scope_->FindMutableTensor(net_info_->input_names[i]);
bm_memcpy_s2d(bm_hd_,
device_inputs_[i].device_mem,
const_cast<void*>(origin_itensors_[i]->raw_data()));
const_cast<void*>(origin_itensor->raw_data()));
}
bmrt_launch_tensor_ex(bmrt_hd_,
net_names_[0],
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册