From 5046baba1c6c2253b8fd1366833fb8473efad047 Mon Sep 17 00:00:00 2001 From: Santa An <49897975+AnBaolei1984@users.noreply.github.com> Date: Sat, 29 Feb 2020 14:07:59 +0800 Subject: [PATCH] * [LITE][BM] support yolov3, test=develop (#3035) --- lite/kernels/bm/bridges/CMakeLists.txt | 4 + lite/kernels/bm/bridges/act_op.cc | 37 ++++-- lite/kernels/bm/bridges/elementwise_ops.cc | 49 +++----- lite/kernels/bm/bridges/interpolate_op.cc | 95 ++++++++++++++ lite/kernels/bm/bridges/paddle_use_bridges.h | 6 + lite/kernels/bm/bridges/yolo_box_op.cc | 125 +++++++++++++++++++ lite/kernels/bm/subgraph_compute.cc | 13 +- lite/kernels/bm/subgraph_compute.h | 1 - 8 files changed, 282 insertions(+), 48 deletions(-) create mode 100644 lite/kernels/bm/bridges/interpolate_op.cc create mode 100644 lite/kernels/bm/bridges/yolo_box_op.cc diff --git a/lite/kernels/bm/bridges/CMakeLists.txt b/lite/kernels/bm/bridges/CMakeLists.txt index bd422de76c..ffe5018ba9 100644 --- a/lite/kernels/bm/bridges/CMakeLists.txt +++ b/lite/kernels/bm/bridges/CMakeLists.txt @@ -23,6 +23,8 @@ lite_cc_library(subgraph_bridge_norm_op_bm SRCS norm_op.cc DEPS ${bm_subgraph_br lite_cc_library(subgraph_bridge_prior_box_op_bm SRCS prior_box_op.cc DEPS ${bm_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_box_coder_op_bm SRCS box_coder_op.cc DEPS ${bm_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_multiclass_nms_op_bm SRCS multiclass_nms_op.cc DEPS ${bm_subgraph_bridge_deps}) +lite_cc_library(subgraph_bridge_interpolate_op_bm SRCS interpolate_op.cc DEPS ${bm_subgraph_bridge_deps}) +lite_cc_library(subgraph_bridge_yolo_box_op_bm SRCS yolo_box_op.cc DEPS ${bm_subgraph_bridge_deps}) set(bm_subgraph_bridges subgraph_bridge_registry @@ -44,4 +46,6 @@ set(bm_subgraph_bridges subgraph_bridge_prior_box_op_bm subgraph_bridge_box_coder_op_bm subgraph_bridge_multiclass_nms_op_bm + subgraph_bridge_interpolate_op_bm + subgraph_bridge_yolo_box_op_bm CACHE INTERNAL "bm_subgraph_bridges") diff --git a/lite/kernels/bm/bridges/act_op.cc b/lite/kernels/bm/bridges/act_op.cc index 0d3c4e0b83..0917431579 100644 --- a/lite/kernels/bm/bridges/act_op.cc +++ b/lite/kernels/bm/bridges/act_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include "lite/kernels/bm/bridges/graph.h" #include "lite/kernels/npu/bridges/registry.h" @@ -46,22 +47,38 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) { i_output_shape_data[i] = static_cast(output_shape_data[i]); } float alpha = 0.f; + int active_type_id = 0; if (op_type == "relu") { } else if (op_type == "leaky_relu") { alpha = op_info->GetAttr("alpha"); + } else if (op_type == "sqrt") { + active_type_id = ACTIVE_SQRT; + } else if (op_type == "square") { + active_type_id = ACTIVE_SQUARE; } else { LOG(FATAL) << "[BM] unsupport act type"; return FAILED; } - add_relu_layer(graph->GetCompilerHandle(), - const_cast(&i_x_shape_data[0]), - x_dims.size(), - static_cast(x_var_name.c_str()), - const_cast(&i_output_shape_data[0]), - output_dims.size(), - static_cast(output_var_name.c_str()), - alpha, - -1.f); + if (op_type == "relu" || op_type == "leaky_relu") { + add_relu_layer(graph->GetCompilerHandle(), + const_cast(&i_x_shape_data[0]), + x_dims.size(), + static_cast(x_var_name.c_str()), + const_cast(&i_output_shape_data[0]), + output_dims.size(), + static_cast(output_var_name.c_str()), + alpha, + -1.f); + } else { + add_active_layer(graph->GetCompilerHandle(), + const_cast(&i_x_shape_data[0]), + x_dims.size(), + static_cast(x_var_name.c_str()), + const_cast(&i_output_shape_data[0]), + output_dims.size(), + static_cast(output_var_name.c_str()), + active_type_id); + } graph->AddNode(output_var_name); return SUCCESS; } @@ -75,3 +92,5 @@ REGISTER_SUBGRAPH_BRIDGE(relu, kBM, paddle::lite::subgraph::bm::ActConverter); REGISTER_SUBGRAPH_BRIDGE(leaky_relu, kBM, paddle::lite::subgraph::bm::ActConverter); +REGISTER_SUBGRAPH_BRIDGE(sqrt, kBM, paddle::lite::subgraph::bm::ActConverter); +REGISTER_SUBGRAPH_BRIDGE(square, kBM, paddle::lite::subgraph::bm::ActConverter); diff --git a/lite/kernels/bm/bridges/elementwise_ops.cc b/lite/kernels/bm/bridges/elementwise_ops.cc index 2fdbfd8c3f..3006a8b6fd 100644 --- a/lite/kernels/bm/bridges/elementwise_ops.cc +++ b/lite/kernels/bm/bridges/elementwise_ops.cc @@ -71,18 +71,14 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { } auto axis = op_info->GetAttr("axis"); int op_code{-1}; - int eltwise_if_code{-1}; - float coeff[2] = {1.f, 1.f}; if (op_type == "elementwise_mul") { op_code = BINARY_MUL; - eltwise_if_code = 0; } else if (op_type == "elementwise_add") { op_code = BINARY_ADD; - eltwise_if_code = 1; } else if (op_type == "elementwise_sub") { op_code = BINARY_SUB; - eltwise_if_code = 1; - coeff[1] = -1.f; + } else if (op_type == "elementwise_div") { + op_code = BINARY_DIV; } else { LOG(FATAL) << "UNSUPPORTED ELTWISE OPERATION: " << op_type; } @@ -115,31 +111,21 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { shape[1] = &i_expand_shape_data[0]; y_data = nullptr; } - add_binary_layer_v2(graph->GetCompilerHandle(), - name[0], - shape[0], - dim[0], - 0, - static_cast(x_data), - name[1], - shape[1], - dim[1], - 0, - static_cast(y_data), - static_cast(output_var_name.c_str()), - op_code); - } else { - add_eltwise_layer(graph->GetCompilerHandle(), - input_num, - shape, - dim, - name, - const_cast(&i_output_shape_data[0]), - output_dims.size(), - static_cast(output_var_name.c_str()), - eltwise_if_code, - coeff); } + add_binary_layer_v2(graph->GetCompilerHandle(), + name[0], + shape[0], + dim[0], + 0, + static_cast(x_data), + name[1], + shape[1], + dim[1], + 0, + static_cast(y_data), + static_cast(output_var_name.c_str()), + op_code); + delete[] shape; delete[] name; delete[] dim; @@ -161,3 +147,6 @@ REGISTER_SUBGRAPH_BRIDGE(elementwise_mul, REGISTER_SUBGRAPH_BRIDGE(elementwise_sub, kBM, paddle::lite::subgraph::bm::ElementwiseConverter); +REGISTER_SUBGRAPH_BRIDGE(elementwise_div, + kBM, + paddle::lite::subgraph::bm::ElementwiseConverter); diff --git a/lite/kernels/bm/bridges/interpolate_op.cc b/lite/kernels/bm/bridges/interpolate_op.cc new file mode 100644 index 0000000000..384b8e0daa --- /dev/null +++ b/lite/kernels/bm/bridges/interpolate_op.cc @@ -0,0 +1,95 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "lite/kernels/bm/bridges/graph.h" +#include "lite/kernels/npu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace subgraph { +namespace bm { + +int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) { + CHECK(ctx != nullptr); + CHECK(op != nullptr); + auto graph = static_cast(ctx); + auto scope = op->scope(); + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + auto x_var_name = op_info->Input("X").front(); + auto x = scope->FindVar(x_var_name)->GetMutable(); + auto x_dims = x->dims(); + std::vector i_x_shape_data(x_dims.size()); + for (size_t i = 0; i < x_dims.size(); i++) { + i_x_shape_data[i] = static_cast(x_dims[i]); + } + auto output_var_name = op_info->Output("Out").front(); + auto output = scope->FindVar(output_var_name)->GetMutable(); + auto output_dims = output->dims(); + std::vector i_output_shape_data(output_dims.size()); + for (size_t i = 0; i < output_dims.size(); i++) { + i_output_shape_data[i] = static_cast(output_dims[i]); + } + auto scale = op_info->GetAttr("scale"); + int32_t i_scale = static_cast(scale); + bool is_int = false; + if ((scale - i_scale) < 0.000001f) { + is_int = true; + } + int32_t type = 0; + if (op_type == "nearest_interp") { + type = 2; + } else { + type = 0; + } + + if (type == 2 && is_int) { + add_upsample_layer(graph->GetCompilerHandle(), + const_cast(&i_x_shape_data[0]), + x_dims.size(), + static_cast(x_var_name.c_str()), + const_cast(&i_output_shape_data[0]), + output_dims.size(), + static_cast(output_var_name.c_str()), + i_scale); + } else { + add_interp_layer_v2(graph->GetCompilerHandle(), + const_cast(&i_x_shape_data[0]), + x_dims.size(), + static_cast(x_var_name.c_str()), + 1, + const_cast(&i_output_shape_data[0]), + output_dims.size(), + nullptr, + static_cast(output_var_name.c_str()), + 0, + 0, + type); + } + graph->AddNode(output_var_name); + return SUCCESS; +} + +} // namespace bm +} // namespace subgraph +} // namespace lite +} // namespace paddle + +REGISTER_SUBGRAPH_BRIDGE(nearest_interp, + kBM, + paddle::lite::subgraph::bm::InterpolateConverter); +REGISTER_SUBGRAPH_BRIDGE(bilinear_interp, + kBM, + paddle::lite::subgraph::bm::InterpolateConverter); diff --git a/lite/kernels/bm/bridges/paddle_use_bridges.h b/lite/kernels/bm/bridges/paddle_use_bridges.h index 72820e965f..74303d2dd7 100644 --- a/lite/kernels/bm/bridges/paddle_use_bridges.h +++ b/lite/kernels/bm/bridges/paddle_use_bridges.h @@ -21,6 +21,7 @@ USE_SUBGRAPH_BRIDGE(depthwise_conv2d, kBM); USE_SUBGRAPH_BRIDGE(elementwise_add, kBM); USE_SUBGRAPH_BRIDGE(elementwise_mul, kBM); USE_SUBGRAPH_BRIDGE(elementwise_sub, kBM); +USE_SUBGRAPH_BRIDGE(elementwise_div, kBM); USE_SUBGRAPH_BRIDGE(pool2d, kBM); USE_SUBGRAPH_BRIDGE(softmax, kBM); USE_SUBGRAPH_BRIDGE(mul, kBM); @@ -38,3 +39,8 @@ USE_SUBGRAPH_BRIDGE(norm, kBM); USE_SUBGRAPH_BRIDGE(prior_box, kBM); USE_SUBGRAPH_BRIDGE(box_coder, kBM); USE_SUBGRAPH_BRIDGE(multiclass_nms, kBM); +USE_SUBGRAPH_BRIDGE(nearest_interp, kBM); +USE_SUBGRAPH_BRIDGE(bilinear_interp, kBM); +USE_SUBGRAPH_BRIDGE(yolo_box, kBM); +USE_SUBGRAPH_BRIDGE(sqrt, kBM); +USE_SUBGRAPH_BRIDGE(square, kBM); diff --git a/lite/kernels/bm/bridges/yolo_box_op.cc b/lite/kernels/bm/bridges/yolo_box_op.cc new file mode 100644 index 0000000000..3d23f87a31 --- /dev/null +++ b/lite/kernels/bm/bridges/yolo_box_op.cc @@ -0,0 +1,125 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include "lite/kernels/bm/bridges/graph.h" +#include "lite/kernels/bm/bridges/utility.h" +#include "lite/kernels/npu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace subgraph { +namespace bm { + +int YoloBoxConverter(void* ctx, OpLite* op, KernelBase* kernel) { + CHECK(ctx != nullptr); + CHECK(op != nullptr); + + auto graph = static_cast(ctx); + auto scope = op->scope(); + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + auto x_var_name = op_info->Input("X").front(); + auto x = scope->FindVar(x_var_name)->GetMutable(); + auto x_dims = x->dims(); + auto img_size_var_name = op_info->Input("ImgSize").front(); + auto img_size = scope->FindVar(img_size_var_name)->GetMutable(); + auto img_size_dims = img_size->dims(); + auto boxes_var_name = op_info->Output("Boxes").front(); + auto boxes = scope->FindVar(boxes_var_name)->GetMutable(); + auto boxes_dims = boxes->dims(); + auto scores_var_name = op_info->Output("Scores").front(); + auto scores = scope->FindVar(scores_var_name)->GetMutable(); + auto scores_dims = scores->dims(); + std::vector i_x_shape_data(x_dims.size()); + for (size_t i = 0; i < x_dims.size(); i++) { + i_x_shape_data[i] = static_cast(x_dims[i]); + } + std::vector i_img_size_shape_data(img_size_dims.size()); + for (size_t i = 0; i < img_size_dims.size(); i++) { + i_img_size_shape_data[i] = static_cast(img_size_dims[i]); + } + std::vector i_boxes_shape_data(boxes_dims.size()); + for (size_t i = 0; i < boxes_dims.size(); i++) { + i_boxes_shape_data[i] = static_cast(boxes_dims[i]); + } + std::vector i_scores_shape_data(scores_dims.size()); + for (size_t i = 0; i < scores_dims.size(); i++) { + i_scores_shape_data[i] = static_cast(scores_dims[i]); + } + + auto class_num = op_info->GetAttr("class_num"); + auto downsample_ratio = op_info->GetAttr("downsample_ratio"); + auto conf_thresh = op_info->GetAttr("conf_thresh"); + auto anchors = op_info->GetAttr>("anchors"); + int* anchors_buffer = static_cast(malloc(sizeof(int) * anchors.size())); + CHECK(anchors_buffer != nullptr); + memcpy(anchors_buffer, &anchors[0], sizeof(int) * anchors.size()); + user_cpu_param_t bm_param; + bm_param.op_type = USER_PADDLE_YOLO_BOX; + bm_param.u.yolo_box_param.class_num = class_num; + bm_param.u.yolo_box_param.downsample_ratio = downsample_ratio; + bm_param.u.yolo_box_param.conf_thresh = conf_thresh; + bm_param.u.yolo_box_param.anchors = anchors_buffer; + bm_param.u.yolo_box_param.anchors_size = anchors.size(); + memcpy(anchors_buffer, &anchors[0], sizeof(int) * anchors.size()); + int32_t input_num = 2; + int32_t output_num = 2; + int32_t* in_shape[2]; + int32_t in_dim[2]; + const char* in_name[2]; + in_shape[0] = &i_x_shape_data[0]; + in_shape[1] = &i_img_size_shape_data[0]; + in_dim[0] = x_dims.size(); + in_dim[1] = img_size_dims.size(); + in_name[0] = static_cast(x_var_name.c_str()); + in_name[1] = static_cast(img_size_var_name.c_str()); + int32_t* out_shape[2]; + int32_t out_dim[2]; + const char* out_name[2]; + out_shape[0] = &i_boxes_shape_data[0]; + out_shape[1] = &i_scores_shape_data[0]; + out_dim[0] = boxes_dims.size(); + out_dim[1] = scores_dims.size(); + out_name[0] = static_cast(boxes_var_name.c_str()); + out_name[1] = static_cast(scores_var_name.c_str()); + + add_user_cpu_layer(graph->GetCompilerHandle(), + input_num, + in_shape, + in_dim, + in_name, + output_num, + out_shape, + out_dim, + out_name, + &bm_param, + static_cast(sizeof(bm_param))); + graph->AddNode(boxes_var_name); + graph->AddNode(scores_var_name); + return SUCCESS; +} + +} // namespace bm +} // namespace subgraph +} // namespace lite +} // namespace paddle + +REGISTER_SUBGRAPH_BRIDGE(yolo_box, + kBM, + paddle::lite::subgraph::bm::YoloBoxConverter); diff --git a/lite/kernels/bm/subgraph_compute.cc b/lite/kernels/bm/subgraph_compute.cc index 2e47102d76..6ca0c0c9ed 100644 --- a/lite/kernels/bm/subgraph_compute.cc +++ b/lite/kernels/bm/subgraph_compute.cc @@ -34,6 +34,7 @@ int SubgraphEngine::BuildDeviceProgram() { const auto& bridges = subgraph::Registry::Instance(); graph.CreateCompilerHandle(); auto& ctx = this->ctx_->template As(); + int kk = 0; for (auto& inst : origin_program_) { auto op = inst.op(); CHECK(op); @@ -52,7 +53,7 @@ int SubgraphEngine::BuildDeviceProgram() { return subgraph::FAILED; } } - std::string net_name = "paddle_bitmain"; + std::string net_name = "bmnetc_f32umodel"; __bmcompile_opt( graph.GetCompilerHandle(), const_cast(net_name.c_str()), 1); void* bmodel_data = nullptr; @@ -71,7 +72,7 @@ int SubgraphEngine::BuildDeviceProgram() { origin_itensors_.resize(input_names_.size()); device_inputs_.resize(input_names_.size()); for (size_t i = 0; i < input_names_.size(); i++) { - origin_itensors_[i] = scope_->FindMutableTensor(input_names_[i]); + origin_itensors_[i] = scope_->FindMutableTensor(net_info_->input_names[i]); CHECK(origin_itensors_[i]); origin_idims_[i] = origin_itensors_[i]->dims(); bm_device_mem_t* p_mem = @@ -90,19 +91,15 @@ int SubgraphEngine::BuildDeviceProgram() { origin_otensors_.resize(output_names_.size()); device_outputs_.resize(output_names_.size()); for (size_t i = 0; i < output_names_.size(); i++) { - origin_otensors_[i] = scope_->FindMutableTensor(output_names_[i]); + origin_otensors_[i] = scope_->FindMutableTensor(net_info_->output_names[i]); CHECK(origin_otensors_[i]); origin_odims_[i] = origin_otensors_[i]->dims(); - output_map_.insert(std::pair(output_names_[i], i)); origin_otensors_[i]->mutable_data(); - } - for (size_t i = 0; i < output_names_.size(); i++) { - int mapping_index = output_map_.at(net_info_->output_names[i]); bm_device_mem_t* p_mem = static_cast(malloc(sizeof(bm_device_mem_t))); CHECK(p_mem != nullptr); CHECK_EQ(bm_malloc_device_byte( - bm_hd_, p_mem, origin_otensors_[mapping_index]->memory_size()), + bm_hd_, p_mem, origin_otensors_[i]->memory_size()), BM_SUCCESS); bmrt_tensor_with_device(&device_outputs_[i], *p_mem, diff --git a/lite/kernels/bm/subgraph_compute.h b/lite/kernels/bm/subgraph_compute.h index 0e4b1dfa32..ed90dc74c4 100644 --- a/lite/kernels/bm/subgraph_compute.h +++ b/lite/kernels/bm/subgraph_compute.h @@ -51,7 +51,6 @@ class SubgraphEngine : public subgraph::Engine { void *bmrt_hd_; std::vector device_inputs_; std::vector device_outputs_; - std::map output_map_; const char **net_names_; const bm_net_info_t *net_info_; bm_handle_t bm_hd_; -- GitLab