* [LITE][BM] support yolov3, test=develop (#3035)

9d88feea · Santa An · GitHub · 63e0f695 · 9d88feea · 9d88feea
8 changed file
--- a/lite/kernels/bm/bridges/CMakeLists.txt
+++ b/lite/kernels/bm/bridges/CMakeLists.txt
@@ -23,6 +23,8 @@ lite_cc_library(subgraph_bridge_norm_op_bm SRCS norm_op.cc DEPS ${bm_subgraph_br
 lite_cc_library(subgraph_bridge_prior_box_op_bm SRCS prior_box_op.cc DEPS ${bm_subgraph_bridge_deps})
 lite_cc_library(subgraph_bridge_box_coder_op_bm SRCS box_coder_op.cc DEPS ${bm_subgraph_bridge_deps})
 lite_cc_library(subgraph_bridge_multiclass_nms_op_bm SRCS multiclass_nms_op.cc DEPS ${bm_subgraph_bridge_deps})
+lite_cc_library(subgraph_bridge_interpolate_op_bm SRCS interpolate_op.cc DEPS ${bm_subgraph_bridge_deps})
+lite_cc_library(subgraph_bridge_yolo_box_op_bm SRCS yolo_box_op.cc DEPS ${bm_subgraph_bridge_deps})

 set(bm_subgraph_bridges
        subgraph_bridge_registry
@@ -44,4 +46,6 @@ set(bm_subgraph_bridges
        subgraph_bridge_prior_box_op_bm
        subgraph_bridge_box_coder_op_bm
        subgraph_bridge_multiclass_nms_op_bm
+        subgraph_bridge_interpolate_op_bm
+        subgraph_bridge_yolo_box_op_bm
        CACHE INTERNAL "bm_subgraph_bridges")
--- a/lite/kernels/bm/bridges/act_op.cc
+++ b/lite/kernels/bm/bridges/act_op.cc
@@ -13,6 +13,7 @@
 // limitations under the License.

 #include <bmcompiler_if.h>
+#include <bmcompiler_op_code.h>
 #include "lite/kernels/bm/bridges/graph.h"
 #include "lite/kernels/npu/bridges/registry.h"

@@ -46,22 +47,38 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    i_output_shape_data[i] = static_cast<int>(output_shape_data[i]);
  }
  float alpha = 0.f;
+  int active_type_id = 0;
  if (op_type == "relu") {
  } else if (op_type == "leaky_relu") {
    alpha = op_info->GetAttr<float>("alpha");
+  } else if (op_type == "sqrt") {
+    active_type_id = ACTIVE_SQRT;
+  } else if (op_type == "square") {
+    active_type_id = ACTIVE_SQUARE;
  } else {
    LOG(FATAL) << "[BM] unsupport act type";
    return FAILED;
  }
-  add_relu_layer(graph->GetCompilerHandle(),
-                 const_cast<const int*>(&i_x_shape_data[0]),
-                 x_dims.size(),
-                 static_cast<const char*>(x_var_name.c_str()),
-                 const_cast<const int*>(&i_output_shape_data[0]),
-                 output_dims.size(),
-                 static_cast<const char*>(output_var_name.c_str()),
-                 alpha,
-                 -1.f);
+  if (op_type == "relu" || op_type == "leaky_relu") {
+    add_relu_layer(graph->GetCompilerHandle(),
+                   const_cast<const int*>(&i_x_shape_data[0]),
+                   x_dims.size(),
+                   static_cast<const char*>(x_var_name.c_str()),
+                   const_cast<const int*>(&i_output_shape_data[0]),
+                   output_dims.size(),
+                   static_cast<const char*>(output_var_name.c_str()),
+                   alpha,
+                   -1.f);
+  } else {
+    add_active_layer(graph->GetCompilerHandle(),
+                     const_cast<const int*>(&i_x_shape_data[0]),
+                     x_dims.size(),
+                     static_cast<const char*>(x_var_name.c_str()),
+                     const_cast<const int*>(&i_output_shape_data[0]),
+                     output_dims.size(),
+                     static_cast<const char*>(output_var_name.c_str()),
+                     active_type_id);
+  }
  graph->AddNode(output_var_name);
  return SUCCESS;
 }
@@ -75,3 +92,5 @@ REGISTER_SUBGRAPH_BRIDGE(relu, kBM, paddle::lite::subgraph::bm::ActConverter);
 REGISTER_SUBGRAPH_BRIDGE(leaky_relu,
                         kBM,
                         paddle::lite::subgraph::bm::ActConverter);
+REGISTER_SUBGRAPH_BRIDGE(sqrt, kBM, paddle::lite::subgraph::bm::ActConverter);
+REGISTER_SUBGRAPH_BRIDGE(square, kBM, paddle::lite::subgraph::bm::ActConverter);
--- a/lite/kernels/bm/bridges/elementwise_ops.cc
+++ b/lite/kernels/bm/bridges/elementwise_ops.cc
@@ -71,18 +71,14 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  }
  auto axis = op_info->GetAttr<int>("axis");
  int op_code{-1};
-  int eltwise_if_code{-1};
-  float coeff[2] = {1.f, 1.f};
  if (op_type == "elementwise_mul") {
    op_code = BINARY_MUL;
-    eltwise_if_code = 0;
  } else if (op_type == "elementwise_add") {
    op_code = BINARY_ADD;
-    eltwise_if_code = 1;
  } else if (op_type == "elementwise_sub") {
    op_code = BINARY_SUB;
-    eltwise_if_code = 1;
-    coeff[1] = -1.f;
+  } else if (op_type == "elementwise_div") {
+    op_code = BINARY_DIV;
  } else {
    LOG(FATAL) << "UNSUPPORTED ELTWISE OPERATION: " << op_type;
  }
@@ -115,31 +111,21 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
      shape[1] = &i_expand_shape_data[0];
      y_data = nullptr;
    }
-    add_binary_layer_v2(graph->GetCompilerHandle(),
-                        name[0],
-                        shape[0],
-                        dim[0],
-                        0,
-                        static_cast<const float*>(x_data),
-                        name[1],
-                        shape[1],
-                        dim[1],
-                        0,
-                        static_cast<const float*>(y_data),
-                        static_cast<const char*>(output_var_name.c_str()),
-                        op_code);
-  } else {
-    add_eltwise_layer(graph->GetCompilerHandle(),
-                      input_num,
-                      shape,
-                      dim,
-                      name,
-                      const_cast<const int*>(&i_output_shape_data[0]),
-                      output_dims.size(),
-                      static_cast<const char*>(output_var_name.c_str()),
-                      eltwise_if_code,
-                      coeff);
  }
+  add_binary_layer_v2(graph->GetCompilerHandle(),
+                      name[0],
+                      shape[0],
+                      dim[0],
+                      0,
+                      static_cast<const float*>(x_data),
+                      name[1],
+                      shape[1],
+                      dim[1],
+                      0,
+                      static_cast<const float*>(y_data),
+                      static_cast<const char*>(output_var_name.c_str()),
+                      op_code);
+
  delete[] shape;
  delete[] name;
  delete[] dim;
@@ -161,3 +147,6 @@ REGISTER_SUBGRAPH_BRIDGE(elementwise_mul,
 REGISTER_SUBGRAPH_BRIDGE(elementwise_sub,
                         kBM,
                         paddle::lite::subgraph::bm::ElementwiseConverter);
+REGISTER_SUBGRAPH_BRIDGE(elementwise_div,
+                         kBM,
+                         paddle::lite::subgraph::bm::ElementwiseConverter);
--- a/lite/kernels/bm/bridges/interpolate_op.cc
+++ b/lite/kernels/bm/bridges/interpolate_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <bmcompiler_if.h>
+#include "lite/kernels/bm/bridges/graph.h"
+#include "lite/kernels/npu/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace subgraph {
+namespace bm {
+
+int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
+  CHECK(ctx != nullptr);
+  CHECK(op != nullptr);
+  auto graph = static_cast<Graph*>(ctx);
+  auto scope = op->scope();
+  auto op_info = op->op_info();
+  auto op_type = op_info->Type();
+  auto x_var_name = op_info->Input("X").front();
+  auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
+  auto x_dims = x->dims();
+  std::vector<int32_t> i_x_shape_data(x_dims.size());
+  for (size_t i = 0; i < x_dims.size(); i++) {
+    i_x_shape_data[i] = static_cast<int32_t>(x_dims[i]);
+  }
+  auto output_var_name = op_info->Output("Out").front();
+  auto output = scope->FindVar(output_var_name)->GetMutable<lite::Tensor>();
+  auto output_dims = output->dims();
+  std::vector<int32_t> i_output_shape_data(output_dims.size());
+  for (size_t i = 0; i < output_dims.size(); i++) {
+    i_output_shape_data[i] = static_cast<int32_t>(output_dims[i]);
+  }
+  auto scale = op_info->GetAttr<float>("scale");
+  int32_t i_scale = static_cast<int32_t>(scale);
+  bool is_int = false;
+  if ((scale - i_scale) < 0.000001f) {
+    is_int = true;
+  }
+  int32_t type = 0;
+  if (op_type == "nearest_interp") {
+    type = 2;
+  } else {
+    type = 0;
+  }
+
+  if (type == 2 && is_int) {
+    add_upsample_layer(graph->GetCompilerHandle(),
+                       const_cast<const int*>(&i_x_shape_data[0]),
+                       x_dims.size(),
+                       static_cast<const char*>(x_var_name.c_str()),
+                       const_cast<const int*>(&i_output_shape_data[0]),
+                       output_dims.size(),
+                       static_cast<const char*>(output_var_name.c_str()),
+                       i_scale);
+  } else {
+    add_interp_layer_v2(graph->GetCompilerHandle(),
+                        const_cast<const int*>(&i_x_shape_data[0]),
+                        x_dims.size(),
+                        static_cast<const char*>(x_var_name.c_str()),
+                        1,
+                        const_cast<const int*>(&i_output_shape_data[0]),
+                        output_dims.size(),
+                        nullptr,
+                        static_cast<const char*>(output_var_name.c_str()),
+                        0,
+                        0,
+                        type);
+  }
+  graph->AddNode(output_var_name);
+  return SUCCESS;
+}
+
+}  // namespace bm
+}  // namespace subgraph
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_SUBGRAPH_BRIDGE(nearest_interp,
+                         kBM,
+                         paddle::lite::subgraph::bm::InterpolateConverter);
+REGISTER_SUBGRAPH_BRIDGE(bilinear_interp,
+                         kBM,
+                         paddle::lite::subgraph::bm::InterpolateConverter);
--- a/lite/kernels/bm/bridges/paddle_use_bridges.h
+++ b/lite/kernels/bm/bridges/paddle_use_bridges.h
@@ -21,6 +21,7 @@ USE_SUBGRAPH_BRIDGE(depthwise_conv2d, kBM);
 USE_SUBGRAPH_BRIDGE(elementwise_add, kBM);
 USE_SUBGRAPH_BRIDGE(elementwise_mul, kBM);
 USE_SUBGRAPH_BRIDGE(elementwise_sub, kBM);
+USE_SUBGRAPH_BRIDGE(elementwise_div, kBM);
 USE_SUBGRAPH_BRIDGE(pool2d, kBM);
 USE_SUBGRAPH_BRIDGE(softmax, kBM);
 USE_SUBGRAPH_BRIDGE(mul, kBM);
@@ -38,3 +39,8 @@ USE_SUBGRAPH_BRIDGE(norm, kBM);
 USE_SUBGRAPH_BRIDGE(prior_box, kBM);
 USE_SUBGRAPH_BRIDGE(box_coder, kBM);
 USE_SUBGRAPH_BRIDGE(multiclass_nms, kBM);
+USE_SUBGRAPH_BRIDGE(nearest_interp, kBM);
+USE_SUBGRAPH_BRIDGE(bilinear_interp, kBM);
+USE_SUBGRAPH_BRIDGE(yolo_box, kBM);
+USE_SUBGRAPH_BRIDGE(sqrt, kBM);
+USE_SUBGRAPH_BRIDGE(square, kBM);
--- a/lite/kernels/bm/bridges/yolo_box_op.cc
+++ b/lite/kernels/bm/bridges/yolo_box_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <bmcompiler_if.h>
+#include <user_bmcpu_common.h>
+#include <iostream>
+#include <string>
+#include <vector>
+#include "lite/kernels/bm/bridges/graph.h"
+#include "lite/kernels/bm/bridges/utility.h"
+#include "lite/kernels/npu/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace subgraph {
+namespace bm {
+
+int YoloBoxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
+  CHECK(ctx != nullptr);
+  CHECK(op != nullptr);
+
+  auto graph = static_cast<Graph*>(ctx);
+  auto scope = op->scope();
+  auto op_info = op->op_info();
+  auto op_type = op_info->Type();
+  auto x_var_name = op_info->Input("X").front();
+  auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
+  auto x_dims = x->dims();
+  auto img_size_var_name = op_info->Input("ImgSize").front();
+  auto img_size = scope->FindVar(img_size_var_name)->GetMutable<lite::Tensor>();
+  auto img_size_dims = img_size->dims();
+  auto boxes_var_name = op_info->Output("Boxes").front();
+  auto boxes = scope->FindVar(boxes_var_name)->GetMutable<lite::Tensor>();
+  auto boxes_dims = boxes->dims();
+  auto scores_var_name = op_info->Output("Scores").front();
+  auto scores = scope->FindVar(scores_var_name)->GetMutable<lite::Tensor>();
+  auto scores_dims = scores->dims();
+  std::vector<int32_t> i_x_shape_data(x_dims.size());
+  for (size_t i = 0; i < x_dims.size(); i++) {
+    i_x_shape_data[i] = static_cast<int32_t>(x_dims[i]);
+  }
+  std::vector<int32_t> i_img_size_shape_data(img_size_dims.size());
+  for (size_t i = 0; i < img_size_dims.size(); i++) {
+    i_img_size_shape_data[i] = static_cast<int32_t>(img_size_dims[i]);
+  }
+  std::vector<int32_t> i_boxes_shape_data(boxes_dims.size());
+  for (size_t i = 0; i < boxes_dims.size(); i++) {
+    i_boxes_shape_data[i] = static_cast<int32_t>(boxes_dims[i]);
+  }
+  std::vector<int32_t> i_scores_shape_data(scores_dims.size());
+  for (size_t i = 0; i < scores_dims.size(); i++) {
+    i_scores_shape_data[i] = static_cast<int32_t>(scores_dims[i]);
+  }
+
+  auto class_num = op_info->GetAttr<int>("class_num");
+  auto downsample_ratio = op_info->GetAttr<int>("downsample_ratio");
+  auto conf_thresh = op_info->GetAttr<float>("conf_thresh");
+  auto anchors = op_info->GetAttr<std::vector<int>>("anchors");
+  int* anchors_buffer = static_cast<int*>(malloc(sizeof(int) * anchors.size()));
+  CHECK(anchors_buffer != nullptr);
+  memcpy(anchors_buffer, &anchors[0], sizeof(int) * anchors.size());
+  user_cpu_param_t bm_param;
+  bm_param.op_type = USER_PADDLE_YOLO_BOX;
+  bm_param.u.yolo_box_param.class_num = class_num;
+  bm_param.u.yolo_box_param.downsample_ratio = downsample_ratio;
+  bm_param.u.yolo_box_param.conf_thresh = conf_thresh;
+  bm_param.u.yolo_box_param.anchors = anchors_buffer;
+  bm_param.u.yolo_box_param.anchors_size = anchors.size();
+  memcpy(anchors_buffer, &anchors[0], sizeof(int) * anchors.size());
+  int32_t input_num = 2;
+  int32_t output_num = 2;
+  int32_t* in_shape[2];
+  int32_t in_dim[2];
+  const char* in_name[2];
+  in_shape[0] = &i_x_shape_data[0];
+  in_shape[1] = &i_img_size_shape_data[0];
+  in_dim[0] = x_dims.size();
+  in_dim[1] = img_size_dims.size();
+  in_name[0] = static_cast<const char*>(x_var_name.c_str());
+  in_name[1] = static_cast<const char*>(img_size_var_name.c_str());
+  int32_t* out_shape[2];
+  int32_t out_dim[2];
+  const char* out_name[2];
+  out_shape[0] = &i_boxes_shape_data[0];
+  out_shape[1] = &i_scores_shape_data[0];
+  out_dim[0] = boxes_dims.size();
+  out_dim[1] = scores_dims.size();
+  out_name[0] = static_cast<const char*>(boxes_var_name.c_str());
+  out_name[1] = static_cast<const char*>(scores_var_name.c_str());
+
+  add_user_cpu_layer(graph->GetCompilerHandle(),
+                     input_num,
+                     in_shape,
+                     in_dim,
+                     in_name,
+                     output_num,
+                     out_shape,
+                     out_dim,
+                     out_name,
+                     &bm_param,
+                     static_cast<int>(sizeof(bm_param)));
+  graph->AddNode(boxes_var_name);
+  graph->AddNode(scores_var_name);
+  return SUCCESS;
+}
+
+}  // namespace bm
+}  // namespace subgraph
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_SUBGRAPH_BRIDGE(yolo_box,
+                         kBM,
+                         paddle::lite::subgraph::bm::YoloBoxConverter);
--- a/lite/kernels/bm/subgraph_compute.cc
+++ b/lite/kernels/bm/subgraph_compute.cc
@@ -34,6 +34,7 @@ int SubgraphEngine::BuildDeviceProgram() {
  const auto& bridges = subgraph::Registry::Instance();
  graph.CreateCompilerHandle();
  auto& ctx = this->ctx_->template As<BMContext>();
+  int kk = 0;
  for (auto& inst : origin_program_) {
    auto op = inst.op();
    CHECK(op);
@@ -52,7 +53,7 @@ int SubgraphEngine::BuildDeviceProgram() {
      return subgraph::FAILED;
    }
  }
-  std::string net_name = "paddle_bitmain";
+  std::string net_name = "bmnetc_f32umodel";
  __bmcompile_opt(
      graph.GetCompilerHandle(), const_cast<char*>(net_name.c_str()), 1);
  void* bmodel_data = nullptr;
@@ -71,7 +72,7 @@ int SubgraphEngine::BuildDeviceProgram() {
  origin_itensors_.resize(input_names_.size());
  device_inputs_.resize(input_names_.size());
  for (size_t i = 0; i < input_names_.size(); i++) {
-    origin_itensors_[i] = scope_->FindMutableTensor(input_names_[i]);
+    origin_itensors_[i] = scope_->FindMutableTensor(net_info_->input_names[i]);
    CHECK(origin_itensors_[i]);
    origin_idims_[i] = origin_itensors_[i]->dims();
    bm_device_mem_t* p_mem =
@@ -90,19 +91,15 @@ int SubgraphEngine::BuildDeviceProgram() {
  origin_otensors_.resize(output_names_.size());
  device_outputs_.resize(output_names_.size());
  for (size_t i = 0; i < output_names_.size(); i++) {
-    origin_otensors_[i] = scope_->FindMutableTensor(output_names_[i]);
+    origin_otensors_[i] = scope_->FindMutableTensor(net_info_->output_names[i]);
    CHECK(origin_otensors_[i]);
    origin_odims_[i] = origin_otensors_[i]->dims();
-    output_map_.insert(std::pair<std::string, int>(output_names_[i], i));
    origin_otensors_[i]->mutable_data<float>();
-  }
-  for (size_t i = 0; i < output_names_.size(); i++) {
-    int mapping_index = output_map_.at(net_info_->output_names[i]);
    bm_device_mem_t* p_mem =
        static_cast<bm_device_mem_t*>(malloc(sizeof(bm_device_mem_t)));
    CHECK(p_mem != nullptr);
    CHECK_EQ(bm_malloc_device_byte(
-                 bm_hd_, p_mem, origin_otensors_[mapping_index]->memory_size()),
+                 bm_hd_, p_mem, origin_otensors_[i]->memory_size()),
             BM_SUCCESS);
    bmrt_tensor_with_device(&device_outputs_[i],
                            *p_mem,

--- a/lite/kernels/bm/subgraph_compute.h
+++ b/lite/kernels/bm/subgraph_compute.h
@@ -51,7 +51,6 @@ class SubgraphEngine : public subgraph::Engine {
  void *bmrt_hd_;
  std::vector<bm_tensor_t> device_inputs_;
  std::vector<bm_tensor_t> device_outputs_;
-  std::map<std::string, int> output_map_;
  const char **net_names_;
  const bm_net_info_t *net_info_;
  bm_handle_t bm_hd_;