提交 5046baba 编写于 作者: S Santa An 提交者: GitHub

* [LITE][BM] support yolov3, test=develop (#3035)

上级 849563b6
......@@ -23,6 +23,8 @@ lite_cc_library(subgraph_bridge_norm_op_bm SRCS norm_op.cc DEPS ${bm_subgraph_br
lite_cc_library(subgraph_bridge_prior_box_op_bm SRCS prior_box_op.cc DEPS ${bm_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_box_coder_op_bm SRCS box_coder_op.cc DEPS ${bm_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_multiclass_nms_op_bm SRCS multiclass_nms_op.cc DEPS ${bm_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_interpolate_op_bm SRCS interpolate_op.cc DEPS ${bm_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_yolo_box_op_bm SRCS yolo_box_op.cc DEPS ${bm_subgraph_bridge_deps})
set(bm_subgraph_bridges
subgraph_bridge_registry
......@@ -44,4 +46,6 @@ set(bm_subgraph_bridges
subgraph_bridge_prior_box_op_bm
subgraph_bridge_box_coder_op_bm
subgraph_bridge_multiclass_nms_op_bm
subgraph_bridge_interpolate_op_bm
subgraph_bridge_yolo_box_op_bm
CACHE INTERNAL "bm_subgraph_bridges")
......@@ -13,6 +13,7 @@
// limitations under the License.
#include <bmcompiler_if.h>
#include <bmcompiler_op_code.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/npu/bridges/registry.h"
......@@ -46,22 +47,38 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) {
i_output_shape_data[i] = static_cast<int>(output_shape_data[i]);
}
float alpha = 0.f;
int active_type_id = 0;
if (op_type == "relu") {
} else if (op_type == "leaky_relu") {
alpha = op_info->GetAttr<float>("alpha");
} else if (op_type == "sqrt") {
active_type_id = ACTIVE_SQRT;
} else if (op_type == "square") {
active_type_id = ACTIVE_SQUARE;
} else {
LOG(FATAL) << "[BM] unsupport act type";
return FAILED;
}
add_relu_layer(graph->GetCompilerHandle(),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
static_cast<const char*>(x_var_name.c_str()),
const_cast<const int*>(&i_output_shape_data[0]),
output_dims.size(),
static_cast<const char*>(output_var_name.c_str()),
alpha,
-1.f);
if (op_type == "relu" || op_type == "leaky_relu") {
add_relu_layer(graph->GetCompilerHandle(),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
static_cast<const char*>(x_var_name.c_str()),
const_cast<const int*>(&i_output_shape_data[0]),
output_dims.size(),
static_cast<const char*>(output_var_name.c_str()),
alpha,
-1.f);
} else {
add_active_layer(graph->GetCompilerHandle(),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
static_cast<const char*>(x_var_name.c_str()),
const_cast<const int*>(&i_output_shape_data[0]),
output_dims.size(),
static_cast<const char*>(output_var_name.c_str()),
active_type_id);
}
graph->AddNode(output_var_name);
return SUCCESS;
}
......@@ -75,3 +92,5 @@ REGISTER_SUBGRAPH_BRIDGE(relu, kBM, paddle::lite::subgraph::bm::ActConverter);
REGISTER_SUBGRAPH_BRIDGE(leaky_relu,
kBM,
paddle::lite::subgraph::bm::ActConverter);
REGISTER_SUBGRAPH_BRIDGE(sqrt, kBM, paddle::lite::subgraph::bm::ActConverter);
REGISTER_SUBGRAPH_BRIDGE(square, kBM, paddle::lite::subgraph::bm::ActConverter);
......@@ -71,18 +71,14 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
}
auto axis = op_info->GetAttr<int>("axis");
int op_code{-1};
int eltwise_if_code{-1};
float coeff[2] = {1.f, 1.f};
if (op_type == "elementwise_mul") {
op_code = BINARY_MUL;
eltwise_if_code = 0;
} else if (op_type == "elementwise_add") {
op_code = BINARY_ADD;
eltwise_if_code = 1;
} else if (op_type == "elementwise_sub") {
op_code = BINARY_SUB;
eltwise_if_code = 1;
coeff[1] = -1.f;
} else if (op_type == "elementwise_div") {
op_code = BINARY_DIV;
} else {
LOG(FATAL) << "UNSUPPORTED ELTWISE OPERATION: " << op_type;
}
......@@ -115,31 +111,21 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
shape[1] = &i_expand_shape_data[0];
y_data = nullptr;
}
add_binary_layer_v2(graph->GetCompilerHandle(),
name[0],
shape[0],
dim[0],
0,
static_cast<const float*>(x_data),
name[1],
shape[1],
dim[1],
0,
static_cast<const float*>(y_data),
static_cast<const char*>(output_var_name.c_str()),
op_code);
} else {
add_eltwise_layer(graph->GetCompilerHandle(),
input_num,
shape,
dim,
name,
const_cast<const int*>(&i_output_shape_data[0]),
output_dims.size(),
static_cast<const char*>(output_var_name.c_str()),
eltwise_if_code,
coeff);
}
add_binary_layer_v2(graph->GetCompilerHandle(),
name[0],
shape[0],
dim[0],
0,
static_cast<const float*>(x_data),
name[1],
shape[1],
dim[1],
0,
static_cast<const float*>(y_data),
static_cast<const char*>(output_var_name.c_str()),
op_code);
delete[] shape;
delete[] name;
delete[] dim;
......@@ -161,3 +147,6 @@ REGISTER_SUBGRAPH_BRIDGE(elementwise_mul,
REGISTER_SUBGRAPH_BRIDGE(elementwise_sub,
kBM,
paddle::lite::subgraph::bm::ElementwiseConverter);
REGISTER_SUBGRAPH_BRIDGE(elementwise_div,
kBM,
paddle::lite::subgraph::bm::ElementwiseConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <bmcompiler_if.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace bm {
int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto scope = op->scope();
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto x_var_name = op_info->Input("X").front();
auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
auto x_dims = x->dims();
std::vector<int32_t> i_x_shape_data(x_dims.size());
for (size_t i = 0; i < x_dims.size(); i++) {
i_x_shape_data[i] = static_cast<int32_t>(x_dims[i]);
}
auto output_var_name = op_info->Output("Out").front();
auto output = scope->FindVar(output_var_name)->GetMutable<lite::Tensor>();
auto output_dims = output->dims();
std::vector<int32_t> i_output_shape_data(output_dims.size());
for (size_t i = 0; i < output_dims.size(); i++) {
i_output_shape_data[i] = static_cast<int32_t>(output_dims[i]);
}
auto scale = op_info->GetAttr<float>("scale");
int32_t i_scale = static_cast<int32_t>(scale);
bool is_int = false;
if ((scale - i_scale) < 0.000001f) {
is_int = true;
}
int32_t type = 0;
if (op_type == "nearest_interp") {
type = 2;
} else {
type = 0;
}
if (type == 2 && is_int) {
add_upsample_layer(graph->GetCompilerHandle(),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
static_cast<const char*>(x_var_name.c_str()),
const_cast<const int*>(&i_output_shape_data[0]),
output_dims.size(),
static_cast<const char*>(output_var_name.c_str()),
i_scale);
} else {
add_interp_layer_v2(graph->GetCompilerHandle(),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
static_cast<const char*>(x_var_name.c_str()),
1,
const_cast<const int*>(&i_output_shape_data[0]),
output_dims.size(),
nullptr,
static_cast<const char*>(output_var_name.c_str()),
0,
0,
type);
}
graph->AddNode(output_var_name);
return SUCCESS;
}
} // namespace bm
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(nearest_interp,
kBM,
paddle::lite::subgraph::bm::InterpolateConverter);
REGISTER_SUBGRAPH_BRIDGE(bilinear_interp,
kBM,
paddle::lite::subgraph::bm::InterpolateConverter);
......@@ -21,6 +21,7 @@ USE_SUBGRAPH_BRIDGE(depthwise_conv2d, kBM);
USE_SUBGRAPH_BRIDGE(elementwise_add, kBM);
USE_SUBGRAPH_BRIDGE(elementwise_mul, kBM);
USE_SUBGRAPH_BRIDGE(elementwise_sub, kBM);
USE_SUBGRAPH_BRIDGE(elementwise_div, kBM);
USE_SUBGRAPH_BRIDGE(pool2d, kBM);
USE_SUBGRAPH_BRIDGE(softmax, kBM);
USE_SUBGRAPH_BRIDGE(mul, kBM);
......@@ -38,3 +39,8 @@ USE_SUBGRAPH_BRIDGE(norm, kBM);
USE_SUBGRAPH_BRIDGE(prior_box, kBM);
USE_SUBGRAPH_BRIDGE(box_coder, kBM);
USE_SUBGRAPH_BRIDGE(multiclass_nms, kBM);
USE_SUBGRAPH_BRIDGE(nearest_interp, kBM);
USE_SUBGRAPH_BRIDGE(bilinear_interp, kBM);
USE_SUBGRAPH_BRIDGE(yolo_box, kBM);
USE_SUBGRAPH_BRIDGE(sqrt, kBM);
USE_SUBGRAPH_BRIDGE(square, kBM);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <bmcompiler_if.h>
#include <user_bmcpu_common.h>
#include <iostream>
#include <string>
#include <vector>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace bm {
int YoloBoxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto scope = op->scope();
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto x_var_name = op_info->Input("X").front();
auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
auto x_dims = x->dims();
auto img_size_var_name = op_info->Input("ImgSize").front();
auto img_size = scope->FindVar(img_size_var_name)->GetMutable<lite::Tensor>();
auto img_size_dims = img_size->dims();
auto boxes_var_name = op_info->Output("Boxes").front();
auto boxes = scope->FindVar(boxes_var_name)->GetMutable<lite::Tensor>();
auto boxes_dims = boxes->dims();
auto scores_var_name = op_info->Output("Scores").front();
auto scores = scope->FindVar(scores_var_name)->GetMutable<lite::Tensor>();
auto scores_dims = scores->dims();
std::vector<int32_t> i_x_shape_data(x_dims.size());
for (size_t i = 0; i < x_dims.size(); i++) {
i_x_shape_data[i] = static_cast<int32_t>(x_dims[i]);
}
std::vector<int32_t> i_img_size_shape_data(img_size_dims.size());
for (size_t i = 0; i < img_size_dims.size(); i++) {
i_img_size_shape_data[i] = static_cast<int32_t>(img_size_dims[i]);
}
std::vector<int32_t> i_boxes_shape_data(boxes_dims.size());
for (size_t i = 0; i < boxes_dims.size(); i++) {
i_boxes_shape_data[i] = static_cast<int32_t>(boxes_dims[i]);
}
std::vector<int32_t> i_scores_shape_data(scores_dims.size());
for (size_t i = 0; i < scores_dims.size(); i++) {
i_scores_shape_data[i] = static_cast<int32_t>(scores_dims[i]);
}
auto class_num = op_info->GetAttr<int>("class_num");
auto downsample_ratio = op_info->GetAttr<int>("downsample_ratio");
auto conf_thresh = op_info->GetAttr<float>("conf_thresh");
auto anchors = op_info->GetAttr<std::vector<int>>("anchors");
int* anchors_buffer = static_cast<int*>(malloc(sizeof(int) * anchors.size()));
CHECK(anchors_buffer != nullptr);
memcpy(anchors_buffer, &anchors[0], sizeof(int) * anchors.size());
user_cpu_param_t bm_param;
bm_param.op_type = USER_PADDLE_YOLO_BOX;
bm_param.u.yolo_box_param.class_num = class_num;
bm_param.u.yolo_box_param.downsample_ratio = downsample_ratio;
bm_param.u.yolo_box_param.conf_thresh = conf_thresh;
bm_param.u.yolo_box_param.anchors = anchors_buffer;
bm_param.u.yolo_box_param.anchors_size = anchors.size();
memcpy(anchors_buffer, &anchors[0], sizeof(int) * anchors.size());
int32_t input_num = 2;
int32_t output_num = 2;
int32_t* in_shape[2];
int32_t in_dim[2];
const char* in_name[2];
in_shape[0] = &i_x_shape_data[0];
in_shape[1] = &i_img_size_shape_data[0];
in_dim[0] = x_dims.size();
in_dim[1] = img_size_dims.size();
in_name[0] = static_cast<const char*>(x_var_name.c_str());
in_name[1] = static_cast<const char*>(img_size_var_name.c_str());
int32_t* out_shape[2];
int32_t out_dim[2];
const char* out_name[2];
out_shape[0] = &i_boxes_shape_data[0];
out_shape[1] = &i_scores_shape_data[0];
out_dim[0] = boxes_dims.size();
out_dim[1] = scores_dims.size();
out_name[0] = static_cast<const char*>(boxes_var_name.c_str());
out_name[1] = static_cast<const char*>(scores_var_name.c_str());
add_user_cpu_layer(graph->GetCompilerHandle(),
input_num,
in_shape,
in_dim,
in_name,
output_num,
out_shape,
out_dim,
out_name,
&bm_param,
static_cast<int>(sizeof(bm_param)));
graph->AddNode(boxes_var_name);
graph->AddNode(scores_var_name);
return SUCCESS;
}
} // namespace bm
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(yolo_box,
kBM,
paddle::lite::subgraph::bm::YoloBoxConverter);
......@@ -34,6 +34,7 @@ int SubgraphEngine::BuildDeviceProgram() {
const auto& bridges = subgraph::Registry::Instance();
graph.CreateCompilerHandle();
auto& ctx = this->ctx_->template As<BMContext>();
int kk = 0;
for (auto& inst : origin_program_) {
auto op = inst.op();
CHECK(op);
......@@ -52,7 +53,7 @@ int SubgraphEngine::BuildDeviceProgram() {
return subgraph::FAILED;
}
}
std::string net_name = "paddle_bitmain";
std::string net_name = "bmnetc_f32umodel";
__bmcompile_opt(
graph.GetCompilerHandle(), const_cast<char*>(net_name.c_str()), 1);
void* bmodel_data = nullptr;
......@@ -71,7 +72,7 @@ int SubgraphEngine::BuildDeviceProgram() {
origin_itensors_.resize(input_names_.size());
device_inputs_.resize(input_names_.size());
for (size_t i = 0; i < input_names_.size(); i++) {
origin_itensors_[i] = scope_->FindMutableTensor(input_names_[i]);
origin_itensors_[i] = scope_->FindMutableTensor(net_info_->input_names[i]);
CHECK(origin_itensors_[i]);
origin_idims_[i] = origin_itensors_[i]->dims();
bm_device_mem_t* p_mem =
......@@ -90,19 +91,15 @@ int SubgraphEngine::BuildDeviceProgram() {
origin_otensors_.resize(output_names_.size());
device_outputs_.resize(output_names_.size());
for (size_t i = 0; i < output_names_.size(); i++) {
origin_otensors_[i] = scope_->FindMutableTensor(output_names_[i]);
origin_otensors_[i] = scope_->FindMutableTensor(net_info_->output_names[i]);
CHECK(origin_otensors_[i]);
origin_odims_[i] = origin_otensors_[i]->dims();
output_map_.insert(std::pair<std::string, int>(output_names_[i], i));
origin_otensors_[i]->mutable_data<float>();
}
for (size_t i = 0; i < output_names_.size(); i++) {
int mapping_index = output_map_.at(net_info_->output_names[i]);
bm_device_mem_t* p_mem =
static_cast<bm_device_mem_t*>(malloc(sizeof(bm_device_mem_t)));
CHECK(p_mem != nullptr);
CHECK_EQ(bm_malloc_device_byte(
bm_hd_, p_mem, origin_otensors_[mapping_index]->memory_size()),
bm_hd_, p_mem, origin_otensors_[i]->memory_size()),
BM_SUCCESS);
bmrt_tensor_with_device(&device_outputs_[i],
*p_mem,
......
......@@ -51,7 +51,6 @@ class SubgraphEngine : public subgraph::Engine {
void *bmrt_hd_;
std::vector<bm_tensor_t> device_inputs_;
std::vector<bm_tensor_t> device_outputs_;
std::map<std::string, int> output_map_;
const char **net_names_;
const bm_net_info_t *net_info_;
bm_handle_t bm_hd_;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册