From 3b3abcbbf21d005a05d4b12a82195c23e121104f Mon Sep 17 00:00:00 2001 From: "baolei.an" Date: Sun, 26 Apr 2020 15:36:30 +0800 Subject: [PATCH] [LITE][BM] support faceboxes and behavior image,test=develop --- lite/kernels/bm/bridges/CMakeLists.txt | 3 +- .../bm/bridges/density_prior_box_op.cc | 270 ++++++++++++++++++ lite/kernels/bm/bridges/elementwise_ops.cc | 135 ++++++--- lite/kernels/bm/bridges/matmul_op.cc | 44 +-- lite/kernels/bm/bridges/paddle_use_bridges.h | 1 + 5 files changed, 388 insertions(+), 65 deletions(-) create mode 100644 lite/kernels/bm/bridges/density_prior_box_op.cc diff --git a/lite/kernels/bm/bridges/CMakeLists.txt b/lite/kernels/bm/bridges/CMakeLists.txt index 57a89696c4..5e45701150 100644 --- a/lite/kernels/bm/bridges/CMakeLists.txt +++ b/lite/kernels/bm/bridges/CMakeLists.txt @@ -35,7 +35,7 @@ lite_cc_library(subgraph_bridge_assign_value_op_bm SRCS assign_value_op.cc DEPS lite_cc_library(subgraph_bridge_shape_op_bm SRCS shape_op.cc DEPS ${bm_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_split_op_bm SRCS split_op.cc DEPS ${bm_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_matmul_op_bm SRCS matmul_op.cc DEPS ${bm_subgraph_bridge_deps}) - +lite_cc_library(subgraph_bridge_density_prior_box_op_bm SRCS density_prior_box_op.cc DEPS ${bm_subgraph_bridge_deps}) set(bm_subgraph_bridges subgraph_bridge_registry @@ -69,4 +69,5 @@ set(bm_subgraph_bridges subgraph_bridge_shape_op_bm subgraph_bridge_split_op_bm subgraph_bridge_matmul_op_bm + subgraph_bridge_density_prior_box_op_bm CACHE INTERNAL "bm_subgraph_bridges") diff --git a/lite/kernels/bm/bridges/density_prior_box_op.cc b/lite/kernels/bm/bridges/density_prior_box_op.cc new file mode 100644 index 0000000000..137c5142d5 --- /dev/null +++ b/lite/kernels/bm/bridges/density_prior_box_op.cc @@ -0,0 +1,270 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "lite/kernels/bm/bridges/graph.h" +#include "lite/kernels/bm/bridges/utility.h" +#include "lite/kernels/npu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace subgraph { +namespace bm { + +typedef struct __tag_st_priorbox_param { + std::vector fixed_sizes; + std::vector fixed_ratios; + std::vector densities; + std::vector variances; + float step_w; + float step_h; + float offset; + int prior_num; + bool clip; + bool flatten_to_2d; +} st_priorbox_param; + +float* compute_density_priorbox_kernel(OpLite* op, st_priorbox_param* param) { + auto op_info = op->op_info(); + auto scope = op->scope(); + // inputs + auto in_var_name = op_info->Input("Input").front(); + auto in = scope->FindVar(in_var_name)->GetMutable(); + auto in_dims = in->dims(); + auto img_var_name = op_info->Input("Image").front(); + auto img = scope->FindVar(img_var_name)->GetMutable(); + auto img_dims = img->dims(); + // outputs + auto boxes_var_name = op_info->Output("Boxes").front(); + auto boxes = scope->FindVar(boxes_var_name)->GetMutable(); + auto var_var_name = op_info->Output("Variances").front(); + auto var = scope->FindVar(var_var_name)->GetMutable(); + + auto img_width = img_dims[3]; + auto img_height = img_dims[2]; + auto feature_width = in_dims[3]; + auto feature_height = in_dims[2]; + float step_width, step_height; + if (param->step_w == 0.f || param->step_h == 0.f) { + step_width = static_cast(img_width) / feature_width; + step_height = static_cast(img_height) / feature_height; + } else { + step_width = param->step_w; + step_height = param->step_h; + } + int num_priors = 0; + for (size_t i = 0; i < param->densities.size(); ++i) { + num_priors += (param->fixed_ratios.size()) * (pow(param->densities[i], 2)); + } + param->prior_num = num_priors; + DDim shape_out({feature_height, feature_width, num_priors, 4}); + int32_t channel_size = feature_height * feature_width * num_priors * 4; + boxes->Resize(shape_out); + var->Resize(shape_out); + int step_average = static_cast((step_width + step_height) * 0.5); + std::vector sqrt_fixed_ratios; + for (size_t i = 0; i < param->fixed_ratios.size(); i++) { + sqrt_fixed_ratios.push_back(sqrt(param->fixed_ratios[i])); + } + float* cpu_data = + static_cast(malloc(sizeof(float) * boxes->data_size() * 2)); + CHECK(cpu_data != nullptr); + float* b_t = cpu_data; + for (int h = 0; h < feature_height; ++h) { + for (int w = 0; w < feature_width; ++w) { + float center_x = (w + param->offset) * step_width; + float center_y = (h + param->offset) * step_height; + + for (size_t s = 0; s < param->fixed_sizes.size(); ++s) { + auto fixed_size = param->fixed_sizes[s]; + int density = param->densities[s]; + int shift = step_average / density; + // Generate density prior boxes with fixed ratios. + for (size_t r = 0; r < param->fixed_ratios.size(); ++r) { + float box_width_ratio = fixed_size * sqrt_fixed_ratios[r]; + float box_height_ratio = fixed_size / sqrt_fixed_ratios[r]; + float density_center_x = center_x - step_average / 2. + shift / 2.; + float density_center_y = center_y - step_average / 2. + shift / 2.; + for (int di = 0; di < density; ++di) { + for (int dj = 0; dj < density; ++dj) { + float center_x_temp = density_center_x + dj * shift; + float center_y_temp = density_center_y + di * shift; + b_t[0] = std::max( + (center_x_temp - box_width_ratio / 2.) / img_width, 0.); + b_t[1] = std::max( + (center_y_temp - box_height_ratio / 2.) / img_height, 0.); + b_t[2] = std::min( + (center_x_temp + box_width_ratio / 2.) / img_width, 1.); + b_t[3] = std::min( + (center_y_temp + box_height_ratio / 2.) / img_height, 1.); + b_t += 4; + } + } + } + } + } + } + + if (param->clip) { + for (int32_t d = 0; d < channel_size; ++d) { + cpu_data[d] = std::min(std::max(cpu_data[d], 0.f), 1.f); + } + } + float* ptr = cpu_data + channel_size; + int count = 0; + for (int32_t h = 0; h < feature_height; ++h) { + for (int32_t w = 0; w < feature_width; ++w) { + for (int32_t i = 0; i < param->prior_num; ++i) { + for (int j = 0; j < 4; ++j) { + ptr[count] = param->variances[j]; + ++count; + } + } + } + } + return cpu_data; +} + +int DensityPriorBoxConverter(void* ctx, OpLite* op, KernelBase* kernel) { + CHECK(ctx != nullptr); + CHECK(op != nullptr); + auto graph = static_cast(ctx); + auto scope = op->scope(); + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + // inputs + auto in_var_name = op_info->Input("Input").front(); + auto in = scope->FindVar(in_var_name)->GetMutable(); + auto in_dims = in->dims(); + auto img_var_name = op_info->Input("Image").front(); + auto img = scope->FindVar(img_var_name)->GetMutable(); + auto img_dims = img->dims(); + std::vector i_input_shape_data(in_dims.size()); + for (size_t i = 0; i < in_dims.size(); i++) { + i_input_shape_data[i] = static_cast(in_dims[i]); + } + // outputs + auto boxes_var_name = op_info->Output("Boxes").front(); + auto boxes = scope->FindVar(boxes_var_name)->GetMutable(); + auto var_var_name = op_info->Output("Variances").front(); + // param + st_priorbox_param param; + param.clip = op_info->GetAttr("clip"); + param.flatten_to_2d = op_info->GetAttr("flatten_to_2d"); + param.fixed_sizes = op_info->GetAttr>("fixed_sizes"); + param.fixed_ratios = op_info->GetAttr>("fixed_ratios"); + param.variances = op_info->GetAttr>("variances"); + param.densities = op_info->GetAttr>("densities"); + + param.offset = op_info->GetAttr("offset"); + if (op_info->HasAttr("step_w")) { + param.step_w = op_info->GetAttr("step_w"); + } + if (op_info->HasAttr("step_h")) { + param.step_h = op_info->GetAttr("step_h"); + } + float* cpu_data = compute_density_priorbox_kernel(op, ¶m); + auto boxes_dims = boxes->dims(); + std::vector i_pri_out_shape_data(3); + i_pri_out_shape_data[0] = 1; + i_pri_out_shape_data[1] = 2; + i_pri_out_shape_data[2] = boxes->data_size(); + auto bm_priorbox_name = lite::subgraph::bm::UniqueName("bm_priorbox"); + add_priorbox_layer(graph->GetCompilerHandle(), + const_cast(&i_input_shape_data[0]), + in_dims.size(), + static_cast(in_var_name.c_str()), + const_cast(&i_pri_out_shape_data[0]), + 3, + static_cast(bm_priorbox_name.c_str()), + static_cast(cpu_data), + 0, + nullptr, + 0, + nullptr, + 0, + nullptr, + 0, + 0, + 0, + nullptr, + 0, + 0, + 0.f, + 0.f, + 0.f); + int32_t* shape[2]; + int32_t dim[2]; + const char* name[2]; + int32_t dim_size = 3; + dim[0] = dim_size; + dim[1] = dim_size; + std::vector i_split_shape_data(dim_size); + for (size_t i = 0; i < dim_size; i++) { + i_split_shape_data[i] = i_pri_out_shape_data[i]; + } + i_split_shape_data[1] /= 2; + shape[0] = &i_split_shape_data[0]; + shape[1] = &i_split_shape_data[0]; + name[0] = static_cast( + lite::subgraph::bm::UniqueName("bm_boxes").c_str()); + name[1] = static_cast( + lite::subgraph::bm::UniqueName("bm_boxes_var").c_str()); + int split_size[2]; + split_size[0] = shape[0][1]; + split_size[1] = shape[1][1]; + add_tf_split_layer(graph->GetCompilerHandle(), + const_cast(&i_pri_out_shape_data[0]), + 3, + static_cast(bm_priorbox_name.c_str()), + 2, + shape, + dim, + name, + 3, + 1, + split_size, + 2); + // final output + std::vector i_output_shape_data(boxes_dims.size()); + for (size_t i = 0; i < boxes_dims.size(); i++) { + i_output_shape_data[i] = static_cast(boxes_dims[i]); + } + add_reshape_layer_v2(graph->GetCompilerHandle(), + name[0], + shape[0], + 3, + static_cast(boxes_var_name.c_str()), + const_cast(&i_output_shape_data[0]), + boxes_dims.size()); + add_reshape_layer_v2(graph->GetCompilerHandle(), + name[1], + shape[1], + 3, + static_cast(var_var_name.c_str()), + const_cast(&i_output_shape_data[0]), + boxes_dims.size()); + graph->AddNode(boxes_var_name); + graph->AddNode(var_var_name); + return SUCCESS; +} + +} // namespace bm +} // namespace subgraph +} // namespace lite +} // namespace paddle + +REGISTER_SUBGRAPH_BRIDGE(density_prior_box, + kBM, + paddle::lite::subgraph::bm::DensityPriorBoxConverter); diff --git a/lite/kernels/bm/bridges/elementwise_ops.cc b/lite/kernels/bm/bridges/elementwise_ops.cc index 3006a8b6fd..4104ad0451 100644 --- a/lite/kernels/bm/bridges/elementwise_ops.cc +++ b/lite/kernels/bm/bridges/elementwise_ops.cc @@ -24,6 +24,48 @@ namespace lite { namespace subgraph { namespace bm { +float* compute_elementwise_both_const(OpLite* op) { + auto op_info = op->op_info(); + auto scope = op->scope(); + auto op_type = op_info->Type(); + + // input + auto x_var_name = op_info->Input("X").front(); + auto x = scope->FindVar(x_var_name)->GetMutable(); + auto x_dims = x->dims(); + auto y_var_name = op_info->Input("Y").front(); + auto y = scope->FindVar(y_var_name)->GetMutable(); + auto y_dims = y->dims(); + // output + auto output_var_name = op_info->Output("Out").front(); + auto output = scope->FindVar(output_var_name)->GetMutable(); + auto output_dims = output->dims(); + float* cpu_data = + static_cast(malloc(sizeof(float) * output->data_size())); + CHECK(cpu_data != nullptr); + CHECK_EQ(x_dims.size(), y_dims.size()); + const float* y_data = const_cast(y->mutable_data()); + const float* x_data = const_cast(x->mutable_data()); + if (op_type == "elementwise_mul") { + for (size_t i = 0; i < output->data_size(); i++) { + cpu_data[i] = x_data[i] * y_data[i]; + } + } else if (op_type == "elementwise_add") { + for (size_t i = 0; i < output->data_size(); i++) { + cpu_data[i] = x_data[i] + y_data[i]; + } + } else if (op_type == "elementwise_sub") { + for (size_t i = 0; i < output->data_size(); i++) { + cpu_data[i] = x_data[i] - y_data[i]; + } + } else if (op_type == "elementwise_div") { + for (size_t i = 0; i < output->data_size(); i++) { + cpu_data[i] = x_data[i] / y_data[i]; + } + } + return cpu_data; +} + int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(ctx != nullptr); CHECK(op != nullptr); @@ -41,21 +83,20 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto x_dims = x->dims(); name[0] = static_cast(x_var_name.c_str()); dim[0] = x_dims.size(); - const int64_t* x_shape_data = const_cast(&x_dims.data()[0]); std::vector i_x_shape_data(x_dims.size()); for (size_t i = 0; i < x_dims.size(); i++) { - i_x_shape_data[i] = static_cast(x_shape_data[i]); + i_x_shape_data[i] = static_cast(x_dims[i]); } shape[0] = &i_x_shape_data[0]; + bool x_is_const = !graph->HasNode(x_var_name); auto y_var_name = op_info->Input("Y").front(); auto y = scope->FindVar(y_var_name)->GetMutable(); auto y_dims = y->dims(); name[1] = static_cast(y_var_name.c_str()); dim[1] = y_dims.size(); - const int64_t* y_shape_data = const_cast(&y_dims.data()[0]); std::vector i_y_shape_data(y_dims.size()); for (size_t i = 0; i < y_dims.size(); i++) { - i_y_shape_data[i] = static_cast(y_shape_data[i]); + i_y_shape_data[i] = static_cast(y_dims[i]); } shape[1] = &i_y_shape_data[0]; bool y_is_const = !graph->HasNode(y_var_name); @@ -86,46 +127,56 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { const float* x_data = const_cast(x->mutable_data()); auto unique_op_name = lite::subgraph::bm::UniqueName("expand_ndims"); std::vector i_expand_shape_data(3); - if (y_is_const) { - if (dim[0] == dim[1] || 2 == dim[0]) { - bm_add_const_tensor(graph->GetCompilerHandle(), - name[1], - shape[1], - dim[1], - static_cast(DTYPE_FP32), - static_cast(y_data)); - } else if (1 == dim[1] && 1 == axis) { - add_expand_ndims_layer(graph->GetCompilerHandle(), - name[1], - shape[1], - dim[1], - static_cast(y_data), - -1, - 2, - static_cast(unique_op_name.c_str())); - name[1] = static_cast(unique_op_name.c_str()); - dim[1] = 3; - i_expand_shape_data[0] = i_y_shape_data[0]; - i_expand_shape_data[1] = 1; - i_expand_shape_data[2] = 1; - shape[1] = &i_expand_shape_data[0]; - y_data = nullptr; + if (x_is_const && y_is_const) { + float* cpu_data = compute_elementwise_both_const(op); + bm_add_const_tensor(graph->GetCompilerHandle(), + static_cast(output_var_name.c_str()), + const_cast(&i_output_shape_data[0]), + output_dims.size(), + static_cast(DTYPE_FP32), + static_cast(cpu_data)); + } else { + if (y_is_const) { + if (dim[0] == dim[1] || 2 == dim[0]) { + bm_add_const_tensor(graph->GetCompilerHandle(), + name[1], + shape[1], + dim[1], + static_cast(DTYPE_FP32), + static_cast(y_data)); + } else if (1 == dim[1] && 1 == axis) { + add_expand_ndims_layer( + graph->GetCompilerHandle(), + name[1], + shape[1], + dim[1], + static_cast(y_data), + -1, + 2, + static_cast(unique_op_name.c_str())); + name[1] = static_cast(unique_op_name.c_str()); + dim[1] = 3; + i_expand_shape_data[0] = i_y_shape_data[0]; + i_expand_shape_data[1] = 1; + i_expand_shape_data[2] = 1; + shape[1] = &i_expand_shape_data[0]; + y_data = nullptr; + } } + add_binary_layer_v2(graph->GetCompilerHandle(), + name[0], + shape[0], + dim[0], + 0, + static_cast(x_data), + name[1], + shape[1], + dim[1], + 0, + static_cast(y_data), + static_cast(output_var_name.c_str()), + op_code); } - add_binary_layer_v2(graph->GetCompilerHandle(), - name[0], - shape[0], - dim[0], - 0, - static_cast(x_data), - name[1], - shape[1], - dim[1], - 0, - static_cast(y_data), - static_cast(output_var_name.c_str()), - op_code); - delete[] shape; delete[] name; delete[] dim; diff --git a/lite/kernels/bm/bridges/matmul_op.cc b/lite/kernels/bm/bridges/matmul_op.cc index 7767b4e6b0..ca8a31d8a0 100644 --- a/lite/kernels/bm/bridges/matmul_op.cc +++ b/lite/kernels/bm/bridges/matmul_op.cc @@ -36,46 +36,46 @@ int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto x_var_name = op_info->Input("X").front(); auto x = scope->FindVar(x_var_name)->GetMutable(); auto x_dims = x->dims(); - const int64_t* x_shape_data = const_cast(&x_dims.data()[0]); std::vector i_x_shape_data(x_dims.size()); for (size_t i = 0; i < x_dims.size(); i++) { - i_x_shape_data[i] = static_cast(x_shape_data[i]); + i_x_shape_data[i] = static_cast(x_dims[i]); } auto y_var_name = op_info->Input("Y").front(); auto y = scope->FindVar(y_var_name)->GetMutable(); auto y_dims = y->dims(); - const int64_t* y_shape_data = const_cast(&y_dims.data()[0]); std::vector i_y_shape_data(y_dims.size()); for (size_t i = 0; i < y_dims.size(); i++) { - i_y_shape_data[i] = static_cast(y_shape_data[i]); + i_y_shape_data[i] = static_cast(y_dims[i]); } // output auto output_var_name = op_info->Output("Out").front(); + auto out = scope->FindVar(output_var_name)->GetMutable(); + auto out_dims = out->dims(); + std::vector i_out_shape_data(out_dims.size()); + for (size_t i = 0; i < out_dims.size(); i++) { + i_out_shape_data[i] = static_cast(out_dims[i]); + } bool transpose_x = op_info->GetAttr("transpose_X"); bool transpose_y = op_info->GetAttr("transpose_Y"); float alpha = op_info->GetAttr("alpha"); + CHECK_EQ(alpha, 1.f); + CHECK_EQ(transpose_x, 0); + CHECK_EQ(transpose_y, 0); - LOG(INFO) << x_dims << " " << y_dims << " " << alpha << " " << transpose_x - << " " << transpose_y; - -#if 0 - add_const_binary_layer(graph->GetCompilerHandle(), + const float* y_data = const_cast(y->mutable_data()); + const float* x_data = const_cast(x->mutable_data()); + add_batch_matmul_layer(graph->GetCompilerHandle(), static_cast(x_var_name.c_str()), const_cast(&i_x_shape_data[0]), x_dims.size(), - scale, - static_cast(unique_op_scale_name.c_str()), - BINARY_MUL, - 0); - add_const_binary_layer(graph->GetCompilerHandle(), - static_cast(unique_op_scale_name.c_str()), - const_cast(&i_x_shape_data[0]), - x_dims.size(), - bias, - static_cast(output_var_name.c_str()), - BINARY_ADD, - 0); -#endif + 0, + x_data, + static_cast(y_var_name.c_str()), + const_cast(&i_y_shape_data[0]), + y_dims.size(), + 0, + y_data, + static_cast(output_var_name.c_str())); graph->AddNode(output_var_name); return SUCCESS; } diff --git a/lite/kernels/bm/bridges/paddle_use_bridges.h b/lite/kernels/bm/bridges/paddle_use_bridges.h index bb6003026d..6b8325477b 100644 --- a/lite/kernels/bm/bridges/paddle_use_bridges.h +++ b/lite/kernels/bm/bridges/paddle_use_bridges.h @@ -60,3 +60,4 @@ USE_SUBGRAPH_BRIDGE(split, kBM); USE_SUBGRAPH_BRIDGE(matmul, kBM); USE_SUBGRAPH_BRIDGE(max_pool2d_with_index, kBM); USE_SUBGRAPH_BRIDGE(sigmoid, kBM); +USE_SUBGRAPH_BRIDGE(density_prior_box, kBM); -- GitLab