提交 3b3abcbb 编写于 作者: B baolei.an

[LITE][BM] support faceboxes and behavior image,test=develop

上级 1fe164fd
......@@ -35,7 +35,7 @@ lite_cc_library(subgraph_bridge_assign_value_op_bm SRCS assign_value_op.cc DEPS
lite_cc_library(subgraph_bridge_shape_op_bm SRCS shape_op.cc DEPS ${bm_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_split_op_bm SRCS split_op.cc DEPS ${bm_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_matmul_op_bm SRCS matmul_op.cc DEPS ${bm_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_density_prior_box_op_bm SRCS density_prior_box_op.cc DEPS ${bm_subgraph_bridge_deps})
set(bm_subgraph_bridges
subgraph_bridge_registry
......@@ -69,4 +69,5 @@ set(bm_subgraph_bridges
subgraph_bridge_shape_op_bm
subgraph_bridge_split_op_bm
subgraph_bridge_matmul_op_bm
subgraph_bridge_density_prior_box_op_bm
CACHE INTERNAL "bm_subgraph_bridges")
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <bmcompiler_if.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace bm {
typedef struct __tag_st_priorbox_param {
std::vector<float> fixed_sizes;
std::vector<float> fixed_ratios;
std::vector<int> densities;
std::vector<float> variances;
float step_w;
float step_h;
float offset;
int prior_num;
bool clip;
bool flatten_to_2d;
} st_priorbox_param;
float* compute_density_priorbox_kernel(OpLite* op, st_priorbox_param* param) {
auto op_info = op->op_info();
auto scope = op->scope();
// inputs
auto in_var_name = op_info->Input("Input").front();
auto in = scope->FindVar(in_var_name)->GetMutable<lite::Tensor>();
auto in_dims = in->dims();
auto img_var_name = op_info->Input("Image").front();
auto img = scope->FindVar(img_var_name)->GetMutable<lite::Tensor>();
auto img_dims = img->dims();
// outputs
auto boxes_var_name = op_info->Output("Boxes").front();
auto boxes = scope->FindVar(boxes_var_name)->GetMutable<lite::Tensor>();
auto var_var_name = op_info->Output("Variances").front();
auto var = scope->FindVar(var_var_name)->GetMutable<lite::Tensor>();
auto img_width = img_dims[3];
auto img_height = img_dims[2];
auto feature_width = in_dims[3];
auto feature_height = in_dims[2];
float step_width, step_height;
if (param->step_w == 0.f || param->step_h == 0.f) {
step_width = static_cast<float>(img_width) / feature_width;
step_height = static_cast<float>(img_height) / feature_height;
} else {
step_width = param->step_w;
step_height = param->step_h;
}
int num_priors = 0;
for (size_t i = 0; i < param->densities.size(); ++i) {
num_priors += (param->fixed_ratios.size()) * (pow(param->densities[i], 2));
}
param->prior_num = num_priors;
DDim shape_out({feature_height, feature_width, num_priors, 4});
int32_t channel_size = feature_height * feature_width * num_priors * 4;
boxes->Resize(shape_out);
var->Resize(shape_out);
int step_average = static_cast<int>((step_width + step_height) * 0.5);
std::vector<float> sqrt_fixed_ratios;
for (size_t i = 0; i < param->fixed_ratios.size(); i++) {
sqrt_fixed_ratios.push_back(sqrt(param->fixed_ratios[i]));
}
float* cpu_data =
static_cast<float*>(malloc(sizeof(float) * boxes->data_size() * 2));
CHECK(cpu_data != nullptr);
float* b_t = cpu_data;
for (int h = 0; h < feature_height; ++h) {
for (int w = 0; w < feature_width; ++w) {
float center_x = (w + param->offset) * step_width;
float center_y = (h + param->offset) * step_height;
for (size_t s = 0; s < param->fixed_sizes.size(); ++s) {
auto fixed_size = param->fixed_sizes[s];
int density = param->densities[s];
int shift = step_average / density;
// Generate density prior boxes with fixed ratios.
for (size_t r = 0; r < param->fixed_ratios.size(); ++r) {
float box_width_ratio = fixed_size * sqrt_fixed_ratios[r];
float box_height_ratio = fixed_size / sqrt_fixed_ratios[r];
float density_center_x = center_x - step_average / 2. + shift / 2.;
float density_center_y = center_y - step_average / 2. + shift / 2.;
for (int di = 0; di < density; ++di) {
for (int dj = 0; dj < density; ++dj) {
float center_x_temp = density_center_x + dj * shift;
float center_y_temp = density_center_y + di * shift;
b_t[0] = std::max(
(center_x_temp - box_width_ratio / 2.) / img_width, 0.);
b_t[1] = std::max(
(center_y_temp - box_height_ratio / 2.) / img_height, 0.);
b_t[2] = std::min(
(center_x_temp + box_width_ratio / 2.) / img_width, 1.);
b_t[3] = std::min(
(center_y_temp + box_height_ratio / 2.) / img_height, 1.);
b_t += 4;
}
}
}
}
}
}
if (param->clip) {
for (int32_t d = 0; d < channel_size; ++d) {
cpu_data[d] = std::min(std::max(cpu_data[d], 0.f), 1.f);
}
}
float* ptr = cpu_data + channel_size;
int count = 0;
for (int32_t h = 0; h < feature_height; ++h) {
for (int32_t w = 0; w < feature_width; ++w) {
for (int32_t i = 0; i < param->prior_num; ++i) {
for (int j = 0; j < 4; ++j) {
ptr[count] = param->variances[j];
++count;
}
}
}
}
return cpu_data;
}
int DensityPriorBoxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto scope = op->scope();
auto op_info = op->op_info();
auto op_type = op_info->Type();
// inputs
auto in_var_name = op_info->Input("Input").front();
auto in = scope->FindVar(in_var_name)->GetMutable<lite::Tensor>();
auto in_dims = in->dims();
auto img_var_name = op_info->Input("Image").front();
auto img = scope->FindVar(img_var_name)->GetMutable<lite::Tensor>();
auto img_dims = img->dims();
std::vector<int32_t> i_input_shape_data(in_dims.size());
for (size_t i = 0; i < in_dims.size(); i++) {
i_input_shape_data[i] = static_cast<int32_t>(in_dims[i]);
}
// outputs
auto boxes_var_name = op_info->Output("Boxes").front();
auto boxes = scope->FindVar(boxes_var_name)->GetMutable<lite::Tensor>();
auto var_var_name = op_info->Output("Variances").front();
// param
st_priorbox_param param;
param.clip = op_info->GetAttr<bool>("clip");
param.flatten_to_2d = op_info->GetAttr<bool>("flatten_to_2d");
param.fixed_sizes = op_info->GetAttr<std::vector<float>>("fixed_sizes");
param.fixed_ratios = op_info->GetAttr<std::vector<float>>("fixed_ratios");
param.variances = op_info->GetAttr<std::vector<float>>("variances");
param.densities = op_info->GetAttr<std::vector<int>>("densities");
param.offset = op_info->GetAttr<float>("offset");
if (op_info->HasAttr("step_w")) {
param.step_w = op_info->GetAttr<float>("step_w");
}
if (op_info->HasAttr("step_h")) {
param.step_h = op_info->GetAttr<float>("step_h");
}
float* cpu_data = compute_density_priorbox_kernel(op, &param);
auto boxes_dims = boxes->dims();
std::vector<int32_t> i_pri_out_shape_data(3);
i_pri_out_shape_data[0] = 1;
i_pri_out_shape_data[1] = 2;
i_pri_out_shape_data[2] = boxes->data_size();
auto bm_priorbox_name = lite::subgraph::bm::UniqueName("bm_priorbox");
add_priorbox_layer(graph->GetCompilerHandle(),
const_cast<const int*>(&i_input_shape_data[0]),
in_dims.size(),
static_cast<const char*>(in_var_name.c_str()),
const_cast<const int*>(&i_pri_out_shape_data[0]),
3,
static_cast<const char*>(bm_priorbox_name.c_str()),
static_cast<const float*>(cpu_data),
0,
nullptr,
0,
nullptr,
0,
nullptr,
0,
0,
0,
nullptr,
0,
0,
0.f,
0.f,
0.f);
int32_t* shape[2];
int32_t dim[2];
const char* name[2];
int32_t dim_size = 3;
dim[0] = dim_size;
dim[1] = dim_size;
std::vector<int32_t> i_split_shape_data(dim_size);
for (size_t i = 0; i < dim_size; i++) {
i_split_shape_data[i] = i_pri_out_shape_data[i];
}
i_split_shape_data[1] /= 2;
shape[0] = &i_split_shape_data[0];
shape[1] = &i_split_shape_data[0];
name[0] = static_cast<const char*>(
lite::subgraph::bm::UniqueName("bm_boxes").c_str());
name[1] = static_cast<const char*>(
lite::subgraph::bm::UniqueName("bm_boxes_var").c_str());
int split_size[2];
split_size[0] = shape[0][1];
split_size[1] = shape[1][1];
add_tf_split_layer(graph->GetCompilerHandle(),
const_cast<const int*>(&i_pri_out_shape_data[0]),
3,
static_cast<const char*>(bm_priorbox_name.c_str()),
2,
shape,
dim,
name,
3,
1,
split_size,
2);
// final output
std::vector<int32_t> i_output_shape_data(boxes_dims.size());
for (size_t i = 0; i < boxes_dims.size(); i++) {
i_output_shape_data[i] = static_cast<int32_t>(boxes_dims[i]);
}
add_reshape_layer_v2(graph->GetCompilerHandle(),
name[0],
shape[0],
3,
static_cast<const char*>(boxes_var_name.c_str()),
const_cast<const int*>(&i_output_shape_data[0]),
boxes_dims.size());
add_reshape_layer_v2(graph->GetCompilerHandle(),
name[1],
shape[1],
3,
static_cast<const char*>(var_var_name.c_str()),
const_cast<const int*>(&i_output_shape_data[0]),
boxes_dims.size());
graph->AddNode(boxes_var_name);
graph->AddNode(var_var_name);
return SUCCESS;
}
} // namespace bm
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(density_prior_box,
kBM,
paddle::lite::subgraph::bm::DensityPriorBoxConverter);
......@@ -24,6 +24,48 @@ namespace lite {
namespace subgraph {
namespace bm {
float* compute_elementwise_both_const(OpLite* op) {
auto op_info = op->op_info();
auto scope = op->scope();
auto op_type = op_info->Type();
// input
auto x_var_name = op_info->Input("X").front();
auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
auto x_dims = x->dims();
auto y_var_name = op_info->Input("Y").front();
auto y = scope->FindVar(y_var_name)->GetMutable<lite::Tensor>();
auto y_dims = y->dims();
// output
auto output_var_name = op_info->Output("Out").front();
auto output = scope->FindVar(output_var_name)->GetMutable<lite::Tensor>();
auto output_dims = output->dims();
float* cpu_data =
static_cast<float*>(malloc(sizeof(float) * output->data_size()));
CHECK(cpu_data != nullptr);
CHECK_EQ(x_dims.size(), y_dims.size());
const float* y_data = const_cast<const float*>(y->mutable_data<float>());
const float* x_data = const_cast<const float*>(x->mutable_data<float>());
if (op_type == "elementwise_mul") {
for (size_t i = 0; i < output->data_size(); i++) {
cpu_data[i] = x_data[i] * y_data[i];
}
} else if (op_type == "elementwise_add") {
for (size_t i = 0; i < output->data_size(); i++) {
cpu_data[i] = x_data[i] + y_data[i];
}
} else if (op_type == "elementwise_sub") {
for (size_t i = 0; i < output->data_size(); i++) {
cpu_data[i] = x_data[i] - y_data[i];
}
} else if (op_type == "elementwise_div") {
for (size_t i = 0; i < output->data_size(); i++) {
cpu_data[i] = x_data[i] / y_data[i];
}
}
return cpu_data;
}
int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
......@@ -41,21 +83,20 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto x_dims = x->dims();
name[0] = static_cast<const char*>(x_var_name.c_str());
dim[0] = x_dims.size();
const int64_t* x_shape_data = const_cast<const int64_t*>(&x_dims.data()[0]);
std::vector<int32_t> i_x_shape_data(x_dims.size());
for (size_t i = 0; i < x_dims.size(); i++) {
i_x_shape_data[i] = static_cast<int>(x_shape_data[i]);
i_x_shape_data[i] = static_cast<int>(x_dims[i]);
}
shape[0] = &i_x_shape_data[0];
bool x_is_const = !graph->HasNode(x_var_name);
auto y_var_name = op_info->Input("Y").front();
auto y = scope->FindVar(y_var_name)->GetMutable<lite::Tensor>();
auto y_dims = y->dims();
name[1] = static_cast<const char*>(y_var_name.c_str());
dim[1] = y_dims.size();
const int64_t* y_shape_data = const_cast<const int64_t*>(&y_dims.data()[0]);
std::vector<int32_t> i_y_shape_data(y_dims.size());
for (size_t i = 0; i < y_dims.size(); i++) {
i_y_shape_data[i] = static_cast<int>(y_shape_data[i]);
i_y_shape_data[i] = static_cast<int>(y_dims[i]);
}
shape[1] = &i_y_shape_data[0];
bool y_is_const = !graph->HasNode(y_var_name);
......@@ -86,46 +127,56 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
const float* x_data = const_cast<const float*>(x->mutable_data<float>());
auto unique_op_name = lite::subgraph::bm::UniqueName("expand_ndims");
std::vector<int32_t> i_expand_shape_data(3);
if (y_is_const) {
if (dim[0] == dim[1] || 2 == dim[0]) {
bm_add_const_tensor(graph->GetCompilerHandle(),
name[1],
shape[1],
dim[1],
static_cast<bm_data_type_t>(DTYPE_FP32),
static_cast<const void*>(y_data));
} else if (1 == dim[1] && 1 == axis) {
add_expand_ndims_layer(graph->GetCompilerHandle(),
name[1],
shape[1],
dim[1],
static_cast<const float*>(y_data),
-1,
2,
static_cast<const char*>(unique_op_name.c_str()));
name[1] = static_cast<const char*>(unique_op_name.c_str());
dim[1] = 3;
i_expand_shape_data[0] = i_y_shape_data[0];
i_expand_shape_data[1] = 1;
i_expand_shape_data[2] = 1;
shape[1] = &i_expand_shape_data[0];
y_data = nullptr;
if (x_is_const && y_is_const) {
float* cpu_data = compute_elementwise_both_const(op);
bm_add_const_tensor(graph->GetCompilerHandle(),
static_cast<const char*>(output_var_name.c_str()),
const_cast<const int*>(&i_output_shape_data[0]),
output_dims.size(),
static_cast<bm_data_type_t>(DTYPE_FP32),
static_cast<const void*>(cpu_data));
} else {
if (y_is_const) {
if (dim[0] == dim[1] || 2 == dim[0]) {
bm_add_const_tensor(graph->GetCompilerHandle(),
name[1],
shape[1],
dim[1],
static_cast<bm_data_type_t>(DTYPE_FP32),
static_cast<const void*>(y_data));
} else if (1 == dim[1] && 1 == axis) {
add_expand_ndims_layer(
graph->GetCompilerHandle(),
name[1],
shape[1],
dim[1],
static_cast<const float*>(y_data),
-1,
2,
static_cast<const char*>(unique_op_name.c_str()));
name[1] = static_cast<const char*>(unique_op_name.c_str());
dim[1] = 3;
i_expand_shape_data[0] = i_y_shape_data[0];
i_expand_shape_data[1] = 1;
i_expand_shape_data[2] = 1;
shape[1] = &i_expand_shape_data[0];
y_data = nullptr;
}
}
add_binary_layer_v2(graph->GetCompilerHandle(),
name[0],
shape[0],
dim[0],
0,
static_cast<const float*>(x_data),
name[1],
shape[1],
dim[1],
0,
static_cast<const float*>(y_data),
static_cast<const char*>(output_var_name.c_str()),
op_code);
}
add_binary_layer_v2(graph->GetCompilerHandle(),
name[0],
shape[0],
dim[0],
0,
static_cast<const float*>(x_data),
name[1],
shape[1],
dim[1],
0,
static_cast<const float*>(y_data),
static_cast<const char*>(output_var_name.c_str()),
op_code);
delete[] shape;
delete[] name;
delete[] dim;
......
......@@ -36,46 +36,46 @@ int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto x_var_name = op_info->Input("X").front();
auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
auto x_dims = x->dims();
const int64_t* x_shape_data = const_cast<const int64_t*>(&x_dims.data()[0]);
std::vector<int32_t> i_x_shape_data(x_dims.size());
for (size_t i = 0; i < x_dims.size(); i++) {
i_x_shape_data[i] = static_cast<int>(x_shape_data[i]);
i_x_shape_data[i] = static_cast<int>(x_dims[i]);
}
auto y_var_name = op_info->Input("Y").front();
auto y = scope->FindVar(y_var_name)->GetMutable<lite::Tensor>();
auto y_dims = y->dims();
const int64_t* y_shape_data = const_cast<const int64_t*>(&y_dims.data()[0]);
std::vector<int32_t> i_y_shape_data(y_dims.size());
for (size_t i = 0; i < y_dims.size(); i++) {
i_y_shape_data[i] = static_cast<int>(y_shape_data[i]);
i_y_shape_data[i] = static_cast<int>(y_dims[i]);
}
// output
auto output_var_name = op_info->Output("Out").front();
auto out = scope->FindVar(output_var_name)->GetMutable<lite::Tensor>();
auto out_dims = out->dims();
std::vector<int32_t> i_out_shape_data(out_dims.size());
for (size_t i = 0; i < out_dims.size(); i++) {
i_out_shape_data[i] = static_cast<int>(out_dims[i]);
}
bool transpose_x = op_info->GetAttr<bool>("transpose_X");
bool transpose_y = op_info->GetAttr<bool>("transpose_Y");
float alpha = op_info->GetAttr<float>("alpha");
CHECK_EQ(alpha, 1.f);
CHECK_EQ(transpose_x, 0);
CHECK_EQ(transpose_y, 0);
LOG(INFO) << x_dims << " " << y_dims << " " << alpha << " " << transpose_x
<< " " << transpose_y;
#if 0
add_const_binary_layer(graph->GetCompilerHandle(),
const float* y_data = const_cast<const float*>(y->mutable_data<float>());
const float* x_data = const_cast<const float*>(x->mutable_data<float>());
add_batch_matmul_layer(graph->GetCompilerHandle(),
static_cast<const char*>(x_var_name.c_str()),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
scale,
static_cast<const char*>(unique_op_scale_name.c_str()),
BINARY_MUL,
0);
add_const_binary_layer(graph->GetCompilerHandle(),
static_cast<const char*>(unique_op_scale_name.c_str()),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
bias,
static_cast<const char*>(output_var_name.c_str()),
BINARY_ADD,
0);
#endif
0,
x_data,
static_cast<const char*>(y_var_name.c_str()),
const_cast<const int*>(&i_y_shape_data[0]),
y_dims.size(),
0,
y_data,
static_cast<const char*>(output_var_name.c_str()));
graph->AddNode(output_var_name);
return SUCCESS;
}
......
......@@ -60,3 +60,4 @@ USE_SUBGRAPH_BRIDGE(split, kBM);
USE_SUBGRAPH_BRIDGE(matmul, kBM);
USE_SUBGRAPH_BRIDGE(max_pool2d_with_index, kBM);
USE_SUBGRAPH_BRIDGE(sigmoid, kBM);
USE_SUBGRAPH_BRIDGE(density_prior_box, kBM);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册