未验证 提交 328d2da4 编写于 作者: Z zhaoying9105 提交者: GitHub

three ops: density prior box & box coder & multiclass nms (#116)

* (feat): add cnplugin cmake

* (feat): add cnplugin deps to backends/CMakeLists.txt and utility.h

* (feat): add box_coder converter

* (feat): add density_prior_box op

* (feat) add multiclass nms converter, while workspace as static tensor

* (ref): change nms test input from file to random, add iou

* (ref): add density_prior_box & box_coder to paddle_use_bridges.h
上级 53544680
...@@ -36,6 +36,12 @@ if(NOT CNRT_INC) ...@@ -36,6 +36,12 @@ if(NOT CNRT_INC)
message(FATAL_ERROR "Can not find cnrt.h in ${NEUWARE_HOME}/include") message(FATAL_ERROR "Can not find cnrt.h in ${NEUWARE_HOME}/include")
endif() endif()
find_path(CNPLUGIN_INC NAMES cnplugin.h
PATHS ${NEUWARE_HOME}/include NO_DEFAULT_PATH)
if(NOT CNPLUGIN_INC)
message(FATAL_ERROR "Can not find cnplugin.h in ${NEUWARE_HOME}/include")
endif()
include_directories("${NEUWARE_HOME}/include") include_directories("${NEUWARE_HOME}/include")
find_library(CNML_LIB_FILE NAMES cnml find_library(CNML_LIB_FILE NAMES cnml
...@@ -59,3 +65,15 @@ else() ...@@ -59,3 +65,15 @@ else()
add_library(cnrt_lib SHARED IMPORTED GLOBAL) add_library(cnrt_lib SHARED IMPORTED GLOBAL)
set_property(TARGET cnrt_lib PROPERTY IMPORTED_LOCATION ${CNRT_LIB_FILE}) set_property(TARGET cnrt_lib PROPERTY IMPORTED_LOCATION ${CNRT_LIB_FILE})
endif() endif()
find_library(CNPLUGIN_LIB_FILE NAMES cnplugin
PATHS ${NEUWARE_HOME}/lib64)
if(NOT CNPLUGIN_LIB_FILE)
message(FATAL_ERROR "Can not find CNPLUGIN Library in ${NEUWARE_HOME}/lib64")
else()
message(STATUS "Found CNPLUGIN Library: ${CNPLUGIN_LIB_FILE}")
add_library(cnplugin_lib SHARED IMPORTED GLOBAL)
set_property(TARGET cnplugin_lib PROPERTY IMPORTED_LOCATION ${CNPLUGIN_LIB_FILE})
endif()
\ No newline at end of file
...@@ -4,4 +4,4 @@ endif() ...@@ -4,4 +4,4 @@ endif()
message (STATUS "Lite with mlu backend") message (STATUS "Lite with mlu backend")
lite_cc_library(target_wrapper_mlu SRCS target_wrapper.cc DEPS cnml_lib cnrt_lib) lite_cc_library(target_wrapper_mlu SRCS target_wrapper.cc DEPS cnml_lib cnrt_lib cnplugin_lib)
...@@ -28,6 +28,9 @@ lite_cc_library(subgraph_bridge_argmax_op_mlu SRCS argmax_op.cc DEPS ${subgraph_ ...@@ -28,6 +28,9 @@ lite_cc_library(subgraph_bridge_argmax_op_mlu SRCS argmax_op.cc DEPS ${subgraph_
lite_cc_library(subgraph_bridge_squeeze_op_mlu SRCS squeeze_op.cc DEPS ${subgraph_bridge_deps_mlu}) lite_cc_library(subgraph_bridge_squeeze_op_mlu SRCS squeeze_op.cc DEPS ${subgraph_bridge_deps_mlu})
lite_cc_library(subgraph_bridge_reshape_op_mlu SRCS reshape_op.cc DEPS ${subgraph_bridge_deps_mlu}) lite_cc_library(subgraph_bridge_reshape_op_mlu SRCS reshape_op.cc DEPS ${subgraph_bridge_deps_mlu})
lite_cc_library(subgraph_bridge_flatten_op_mlu SRCS flatten_op.cc DEPS ${subgraph_bridge_deps_mlu}) lite_cc_library(subgraph_bridge_flatten_op_mlu SRCS flatten_op.cc DEPS ${subgraph_bridge_deps_mlu})
lite_cc_library(subgraph_bridge_box_coder_op_mlu SRCS box_coder_op.cc DEPS ${subgraph_bridge_deps_mlu})
lite_cc_library(subgraph_bridge_density_prior_box_op_mlu SRCS density_prior_box_op.cc DEPS ${subgraph_bridge_deps_mlu})
lite_cc_library(subgraph_bridge_multiclass_nms_mlu SRCS multiclass_nms.cc multiclass_nms_api.cc multiclass_nms_impl.o DEPS ${subgraph_bridge_deps_mlu})
set(mlu_subgraph_bridges set(mlu_subgraph_bridges
subgraph_bridge_registry subgraph_bridge_registry
subgraph_bridge_utility_mlu subgraph_bridge_utility_mlu
...@@ -52,6 +55,9 @@ set(mlu_subgraph_bridges ...@@ -52,6 +55,9 @@ set(mlu_subgraph_bridges
subgraph_bridge_squeeze_op_mlu subgraph_bridge_squeeze_op_mlu
subgraph_bridge_reshape_op_mlu subgraph_bridge_reshape_op_mlu
subgraph_bridge_flatten_op_mlu subgraph_bridge_flatten_op_mlu
subgraph_bridge_box_coder_op_mlu
subgraph_bridge_density_prior_box_op_mlu
subgraph_bridge_multiclass_nms_mlu
CACHE INTERNAL "mlu_subgraph_bridges") CACHE INTERNAL "mlu_subgraph_bridges")
...@@ -88,6 +94,9 @@ lite_cc_test(test_argmax_converter_mlu SRCS argmax_op_test.cc DEPS scope optimiz ...@@ -88,6 +94,9 @@ lite_cc_test(test_argmax_converter_mlu SRCS argmax_op_test.cc DEPS scope optimiz
lite_cc_test(test_squeeze_converter_mlu SRCS squeeze_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) lite_cc_test(test_squeeze_converter_mlu SRCS squeeze_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
lite_cc_test(test_reshape_converter_mlu SRCS reshape_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) lite_cc_test(test_reshape_converter_mlu SRCS reshape_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
lite_cc_test(test_flatten_converter_mlu SRCS flatten_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) lite_cc_test(test_flatten_converter_mlu SRCS flatten_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
lite_cc_test(test_box_coder_mlu SRCS box_coder_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
lite_cc_test(test_density_prior_box_mlu SRCS density_prior_box_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
lite_cc_test(test_multiclass_nms_op_converter_mlu SRCS multiclass_nms_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
if (LITE_BUILD_EXTRA) if (LITE_BUILD_EXTRA)
lite_cc_test(test_norm_converter_mlu SRCS norm_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) lite_cc_test(test_norm_converter_mlu SRCS norm_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
lite_cc_test(test_lrn_converter_mlu SRCS lrn_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) lite_cc_test(test_lrn_converter_mlu SRCS lrn_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/mlu/bridges/graph.h"
#include "lite/kernels/mlu/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace mlu {
inline cnmlBoxCodeType_t GetBoxCodeType(const std::string& type) {
if (type == "encode_center_size") {
return cnmlBoxCodeType_t::Encode;
}
return cnmlBoxCodeType_t::Decode;
}
int BoxCoderConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[MLU] Converting " + op_type + "...";
auto Prior_box_name = op_info->Input("PriorBox").front();
auto Target_box_name = op_info->Input("TargetBox").front();
auto Output_box_name = op_info->Output("OutputBox").front();
std::vector<std::string> input_arg_names = op_info->InputArgumentNames();
if (std::find(input_arg_names.begin(),
input_arg_names.end(),
"PriorBoxVar") == input_arg_names.end()) {
LOG(FATAL) << "box coder mlu kernel expect PriorBoxVar input" << std::endl;
}
auto box_var_name = op_info->Input("PriorBoxVar").front();
auto* prior_box = scope->FindVar(Prior_box_name)->GetMutable<Tensor>();
auto* target_box = scope->FindVar(Target_box_name)->GetMutable<Tensor>();
auto* proposals = scope->FindVar(Output_box_name)->GetMutable<Tensor>();
auto* box_var = scope->FindVar(box_var_name)->GetMutable<Tensor>();
auto code_type_str = op_info->GetAttr<std::string>("code_type");
auto box_normalized = op_info->GetAttr<bool>("box_normalized");
int axis = -1;
if (op_info->HasAttr("axis")) {
axis = op_info->GetAttr<int>("axis");
} else {
LOG(FATAL) << "box coder mlu kernel expect axis" << std::endl;
}
if (op_info->HasAttr("variance")) {
LOG(WARNING) << "box coder mlu kernel expect not have variance attr"
<< std::endl;
VLOG(6) << "variance: ";
auto variance_vec = op_info->GetAttr<std::vector<float>>("variance");
for (size_t i = 0; i < variance_vec.size(); i++) {
VLOG(6) << variance_vec[i];
}
}
cnmlBoxCodeType_t code_type = GetBoxCodeType(code_type_str);
int row = -1;
int len = -1;
int col = -1;
if (code_type == cnmlBoxCodeType_t::Encode) {
// target_box_shape = {row, len};
// prior_box_shape = {col, len};
// output_shape = {row, col, len};
row = target_box->dims()[0];
len = target_box->dims()[1];
col = prior_box->dims()[0];
} else if (code_type == cnmlBoxCodeType_t::Decode) {
// target_box_shape = {row,col,len};
// prior_box_shape = {col, len} if axis == 0, or {row, len};
// output_shape = {row, col, len};
row = target_box->dims()[0];
col = target_box->dims()[1];
len = target_box->dims()[2];
if (axis == 0) {
CHECK(prior_box->dims()[0] == col);
} else {
CHECK(prior_box->dims()[0] == row);
}
}
bool float32_precision = false;
if (graph->FPType() == CNML_DATA_FLOAT32) {
float32_precision = true;
}
// =================== DEBUG ======================
VLOG(6) << "prior_box->dims(): " << prior_box->dims();
VLOG(6) << "target_box->dims(): " << target_box->dims();
VLOG(6) << "box_var->dims(): " << box_var->dims();
VLOG(6) << "proposals->dims(): " << proposals->dims();
VLOG(6) << "code_type_str: " << code_type_str;
VLOG(6) << "col: " << col;
VLOG(6) << "row: " << row;
VLOG(6) << "len: " << len;
VLOG(6) << "axis: " << axis;
VLOG(6) << "box_normalized :" << box_normalized;
VLOG(6) << "float32_precision: " << float32_precision;
VLOG(6) << "Prior_box_name: " << Prior_box_name;
VLOG(6) << "Target_box_name: " << Target_box_name;
VLOG(6) << "Output_box_name: " << Output_box_name;
VLOG(6) << "box_var_name: " << box_var_name;
// =================== DEBUG END ======================
auto target_box_tensor = graph->GetNode(Target_box_name);
auto prior_box_tensor = graph->GetNode(Prior_box_name);
auto box_var_tensor = graph->GetNode(box_var_name);
auto proposals_tensor = graph->AddNode(Output_box_name,
proposals->dims().Vectorize(),
CNML_TENSOR,
CNML_NCHW,
graph->FPType());
cnmlPluginBoxCoderOpParam_t param;
CNML_CALL(
cnmlCreatePluginBoxCoderOpParam(&param,
row,
col,
len,
axis,
box_normalized,
float32_precision,
code_type,
TargetWrapperMlu::MLUCoreVersion()));
cnmlBaseOp_t box_coder_op;
cnmlTensor_t input_tensors[3];
input_tensors[0] = target_box_tensor->mlu_tensor();
input_tensors[1] = prior_box_tensor->mlu_tensor();
input_tensors[2] = box_var_tensor->mlu_tensor();
cnmlTensor_t output_tensors[1];
output_tensors[0] = proposals_tensor->mlu_tensor();
CNML_CALL(cnmlCreatePluginBoxCoderOp(
&box_coder_op, param, input_tensors, output_tensors));
// CNML_CALL(cnmlSetOperationComputingLayout(box_coder_op, CNML_NCHW)); //
// important
graph->FuseOp(box_coder_op);
cnmlDestroyPluginBoxCoderOpParam(&param);
return SUCCESS;
}
} // namespace mlu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(box_coder,
kMLU,
paddle::lite::subgraph::mlu::BoxCoderConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/box_coder_op.h"
#include <gtest/gtest.h>
#include <random>
#include "lite/core/op_registry.h"
#include "lite/kernels/mlu/bridges/test_helper.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace mlu {
void ToFile(Tensor *tensor, std::string file_name) {
int count = tensor->dims().production();
auto data = tensor->mutable_data<float>();
std::ostringstream outs;
for (size_t i = 0; i < count; i++) {
outs << data[i] << std::endl;
}
std::ofstream of;
of.open(file_name, std::ios::out);
of << outs.str();
of.close();
}
inline std::string BoxCodeTypeToStr(cnmlBoxCodeType_t code_type) {
if (code_type == cnmlBoxCodeType_t::Encode) {
return "encode_center_size";
} else if (code_type == cnmlBoxCodeType_t::Decode) {
return "decode_center_size";
} else {
CHECK(false);
}
}
inline cnmlBoxCodeType_t GetBoxCodeType(const std::string &type) {
if (type == "encode_center_size") {
return cnmlBoxCodeType_t::Encode;
} else if (type == "decode_center_size") {
return cnmlBoxCodeType_t::Decode;
} else {
CHECK(false);
}
}
void EncodeCenterSize(float *target_box_data,
float *prior_box_data,
float *prior_box_var_data,
std::vector<int64_t> target_box_shape,
std::vector<int64_t> prior_box_shape,
std::vector<int64_t> prior_box_var_shape,
const bool normalized,
const std::vector<float> variance,
float *output) {
int64_t row = target_box_shape[0];
int64_t col = prior_box_shape[0];
int64_t len = prior_box_shape[1];
for (int64_t i = 0; i < row; ++i) {
for (int64_t j = 0; j < col; ++j) {
size_t offset = i * col * len + j * len;
float prior_box_width = prior_box_data[j * len + 2] -
prior_box_data[j * len] + (normalized == false);
float prior_box_height = prior_box_data[j * len + 3] -
prior_box_data[j * len + 1] +
(normalized == false);
float prior_box_center_x = prior_box_data[j * len] + prior_box_width / 2;
float prior_box_center_y =
prior_box_data[j * len + 1] + prior_box_height / 2;
float target_box_center_x =
(target_box_data[i * len + 2] + target_box_data[i * len]) / 2;
float target_box_center_y =
(target_box_data[i * len + 3] + target_box_data[i * len + 1]) / 2;
float target_box_width = target_box_data[i * len + 2] -
target_box_data[i * len] + (normalized == false);
float target_box_height = target_box_data[i * len + 3] -
target_box_data[i * len + 1] +
(normalized == false);
output[offset] =
(target_box_center_x - prior_box_center_x) / prior_box_width;
output[offset + 1] =
(target_box_center_y - prior_box_center_y) / prior_box_height;
output[offset + 2] =
std::log(std::fabs(target_box_width / prior_box_width));
output[offset + 3] =
std::log(std::fabs(target_box_height / prior_box_height));
}
}
if (prior_box_var_data) {
for (int64_t i = 0; i < row; ++i) {
for (int64_t j = 0; j < col; ++j) {
for (int k = 0; k < 4; ++k) {
size_t offset = i * col * len + j * len;
int prior_var_offset = j * len;
output[offset + k] /= prior_box_var_data[prior_var_offset + k];
}
}
}
} else if (!(variance.empty())) {
for (int64_t i = 0; i < row; ++i) {
for (int64_t j = 0; j < col; ++j) {
for (int k = 0; k < 4; ++k) {
size_t offset = i * col * len + j * len;
output[offset + k] /= static_cast<float>(variance[k]);
}
}
}
}
}
template <int axis, int var_size>
void DecodeCenterSize(float *target_box_data,
float *prior_box_data,
float *prior_box_var_data,
std::vector<int64_t> target_box_shape,
std::vector<int64_t> prior_box_shape,
std::vector<int64_t> prior_box_var_shape,
const bool normalized,
std::vector<float> variance,
float *output) {
int64_t row = target_box_shape[0];
int64_t col = target_box_shape[1];
int64_t len = target_box_shape[2];
for (int64_t i = 0; i < row; ++i) {
for (int64_t j = 0; j < col; ++j) {
float var_data[4] = {1., 1., 1., 1.};
float *var_ptr = var_data;
size_t offset = i * col * len + j * len;
int prior_box_offset = axis == 0 ? j * len : i * len;
float prior_box_width = prior_box_data[prior_box_offset + 2] -
prior_box_data[prior_box_offset] +
(normalized == false);
float prior_box_height = prior_box_data[prior_box_offset + 3] -
prior_box_data[prior_box_offset + 1] +
(normalized == false);
float prior_box_center_x =
prior_box_data[prior_box_offset] + prior_box_width / 2;
float prior_box_center_y =
prior_box_data[prior_box_offset + 1] + prior_box_height / 2;
float target_box_center_x = 0, target_box_center_y = 0;
float target_box_width = 0, target_box_height = 0;
int prior_var_offset = axis == 0 ? j * len : i * len;
if (var_size == 2) {
std::memcpy(
var_ptr, prior_box_var_data + prior_var_offset, 4 * sizeof(float));
} else if (var_size == 1) {
var_ptr = reinterpret_cast<float *>(variance.data());
}
float box_var_x = *var_ptr;
float box_var_y = *(var_ptr + 1);
float box_var_w = *(var_ptr + 2);
float box_var_h = *(var_ptr + 3);
target_box_center_x =
box_var_x * target_box_data[offset] * prior_box_width +
prior_box_center_x;
target_box_center_y =
box_var_y * target_box_data[offset + 1] * prior_box_height +
prior_box_center_y;
target_box_width =
std::exp(box_var_w * target_box_data[offset + 2]) * prior_box_width;
target_box_height =
std::exp(box_var_h * target_box_data[offset + 3]) * prior_box_height;
output[offset] = target_box_center_x - target_box_width / 2;
output[offset + 1] = target_box_center_y - target_box_height / 2;
output[offset + 2] =
target_box_center_x + target_box_width / 2 - (normalized == false);
output[offset + 3] =
target_box_center_y + target_box_height / 2 - (normalized == false);
}
}
}
void Compute(cnmlBoxCodeType_t code_type,
lite::Tensor *prior_box,
lite::Tensor *target_box,
lite::Tensor *box_var,
lite::Tensor *output_box,
std::vector<float> variance,
bool normalized,
int axis) {
// BoxCodeType code_type = BoxCodeType::kDecodeCenterSize;
// std::vector<int> prior_box_shape = {512, 4};
// std::vector<int> prior_box_var_shape = prior_box_shape;
// std::vector<int> target_box_shape;
// std::vector<int> output_shape;
// if (code_type == BoxCodeType::kEncodeCenterSize) {
// target_box_shape = {81, 4};
// output_shape = {81, 512, 4};
// } else {
// target_box_shape = {81, 512, 4};
// output_shape = {81, 512, 4};
// }
auto *prior_box_data = prior_box->mutable_data<float>();
auto *prior_box_var_data = box_var->mutable_data<float>();
auto *target_box_data = target_box->mutable_data<float>();
auto *output_data = output_box->mutable_data<float>();
auto target_box_shape = target_box->dims().Vectorize();
auto prior_box_shape = prior_box->dims().Vectorize();
auto prior_box_var_shape = box_var->dims().Vectorize();
if (code_type == cnmlBoxCodeType_t::Encode) {
EncodeCenterSize(target_box_data,
prior_box_data,
prior_box_var_data,
target_box_shape,
prior_box_shape,
prior_box_var_shape,
normalized,
variance,
output_data);
} else if (code_type == cnmlBoxCodeType_t::Decode) {
if (prior_box_var_data) {
LOG(INFO) << "prior_box_var_data not null" << std::endl;
if (axis == 0) {
LOG(INFO) << "use DecodeCenterSize<1, 2> axis == 0" << std::endl;
DecodeCenterSize<0, 2>(target_box_data,
prior_box_data,
prior_box_var_data,
target_box_shape,
prior_box_shape,
prior_box_var_shape,
normalized,
variance,
output_data);
} else {
LOG(INFO) << "use DecodeCenterSize<1, 2> axis == 1" << std::endl;
DecodeCenterSize<1, 2>(target_box_data,
prior_box_data,
prior_box_var_data,
target_box_shape,
prior_box_shape,
prior_box_var_shape,
normalized,
variance,
output_data);
}
} else if (!(variance.empty())) {
LOG(INFO) << "prior_box_var_data null" << std::endl;
if (axis == 0) {
DecodeCenterSize<0, 1>(target_box_data,
prior_box_data,
prior_box_var_data,
target_box_shape,
prior_box_shape,
prior_box_var_shape,
normalized,
variance,
output_data);
} else {
DecodeCenterSize<1, 1>(target_box_data,
prior_box_data,
prior_box_var_data,
target_box_shape,
prior_box_shape,
prior_box_var_shape,
normalized,
variance,
output_data);
}
} else {
if (axis == 0) {
DecodeCenterSize<0, 0>(target_box_data,
prior_box_data,
prior_box_var_data,
target_box_shape,
prior_box_shape,
prior_box_var_shape,
normalized,
variance,
output_data);
} else {
DecodeCenterSize<1, 0>(target_box_data,
prior_box_data,
prior_box_var_data,
target_box_shape,
prior_box_shape,
prior_box_var_shape,
normalized,
variance,
output_data);
}
}
}
}
void box_coder_ref(const std::shared_ptr<operators::BoxCoderOpLite> op) {
Scope *scope = op->scope();
const OpInfo *op_info = op->op_info();
auto prior_box =
scope->FindVar(op_info->Input("PriorBox").front())->GetMutable<Tensor>();
auto target_box =
scope->FindVar(op_info->Input("TargetBox").front())->GetMutable<Tensor>();
auto box_var = scope->FindVar(op_info->Input("PriorBoxVar").front())
->GetMutable<Tensor>();
auto output_box = scope->FindVar(op_info->Output("OutputBox").front())
->GetMutable<Tensor>();
auto code_type_str = op_info->GetAttr<std::string>("code_type");
auto box_normalized = op_info->GetAttr<bool>("box_normalized");
auto axis = op_info->GetAttr<int>("axis");
auto code_type = GetBoxCodeType(code_type_str);
std::vector<float> variance;
if (op_info->HasAttr("variance")) {
variance = op_info->GetAttr<std::vector<float>>("variance");
}
Compute(code_type,
prior_box,
target_box,
box_var,
output_box,
variance,
box_normalized,
axis);
}
void test_box_coder(int row,
int col,
int len,
int axis,
cnmlBoxCodeType_t code_type,
bool box_normalized) {
// prepare input&output variables
Scope scope;
std::string prior_box_var_name("PriorBox");
std::string taget_box_var_name("TargetBox");
std::string output_box_var_name("OutputBox");
std::string box_var_var_name("PriorBoxVar");
std::string output_ref_var_name("OutputBox_ref");
auto *prior_box = scope.Var(prior_box_var_name)->GetMutable<Tensor>();
auto *target_box = scope.Var(taget_box_var_name)->GetMutable<Tensor>();
auto *box_var = scope.Var(box_var_var_name)->GetMutable<Tensor>();
auto *output_box = scope.Var(output_box_var_name)->GetMutable<Tensor>();
auto *output_box_ref = scope.Var(output_ref_var_name)->GetMutable<Tensor>();
if (code_type == cnmlBoxCodeType_t::Encode) {
// target_box_shape = {row, len};
// prior_box_shape = {col, len};
// output_shape = {row, col, len};
target_box->Resize({row, len});
prior_box->Resize({col, len});
box_var->Resize({col, len});
} else if (code_type == cnmlBoxCodeType_t::Decode) {
// target_box_shape = {row,col,len};
// prior_box_shape = {col, len} if axis == 0, or {row, len};
// output_shape = {row, col, len};
target_box->Resize({row, col, len});
if (axis == 0) {
prior_box->Resize({col, len});
box_var->Resize({col, len});
} else if (axis == 1) {
prior_box->Resize({row, len});
box_var->Resize({row, len});
} else {
LOG(FATAL) << "axis should in {0,1} ,but got " << axis << std::endl;
}
}
// initialize input&output data
// FillTensor<float>(prior_box);
// FillTensor<float>(target_box);
// FillTensor<float, int>(box_var); // ??????
for (int i = 0; i < prior_box->dims().production(); i++) {
prior_box->mutable_data<float>()[i] = static_cast<float>((i % 8) + 1);
}
for (int i = 0; i < target_box->dims().production(); i++) {
target_box->mutable_data<float>()[i] = static_cast<float>((i % 8) + 1);
}
for (int i = 0; i < box_var->dims().production() / 4; i++) {
box_var->mutable_data<float>()[i * 4 + 0] = 0.1;
box_var->mutable_data<float>()[i * 4 + 1] = 0.1;
box_var->mutable_data<float>()[i * 4 + 2] = 0.2;
box_var->mutable_data<float>()[i * 4 + 3] = 0.2;
}
LOG(INFO) << "prior_box count : " << prior_box->dims().production();
LOG(INFO) << "target_box count : " << target_box->dims().production();
LOG(INFO) << "box_var count : " << box_var->dims().production();
// ToFile(*prior_box, "prior_box.txt");
// ToFile(*box_var, "box_var.txt");
// ToFile(*target_box, "target_box.txt");
// initialize op desc
cpp::OpDesc opdesc;
opdesc.SetType("box_coder");
opdesc.SetInput("PriorBox", {prior_box_var_name});
opdesc.SetInput("TargetBox", {taget_box_var_name});
opdesc.SetInput("PriorBoxVar", {box_var_var_name});
opdesc.SetOutput("OutputBox", {output_box_var_name});
opdesc.SetAttr("axis", axis);
opdesc.SetAttr("box_normalized", box_normalized);
opdesc.SetAttr("code_type", BoxCodeTypeToStr(code_type));
// trans inputs
Tensor prior_box_trans;
Tensor box_var_trans;
Tensor target_box_trans;
prior_box_trans.Resize(prior_box->dims());
box_var_trans.Resize(box_var->dims());
target_box_trans.Resize(target_box->dims());
auto op = CreateOp<paddle::lite::operators::BoxCoderOpLite>(opdesc, &scope);
box_coder_ref(op);
output_box_ref->CopyDataFrom(*output_box);
// transpose(prior_box->mutable_data<float>(),
// prior_box_trans.mutable_data<float>(),
// {static_cast<int>(prior_box->dims()[0]),
// static_cast<int>(prior_box->dims()[1]),
// 1,
// 1},
// {0, 2, 3, 1});
// row col len 1 --> row len 1 col
transpose(target_box->mutable_data<float>(),
target_box_trans.mutable_data<float>(),
{
static_cast<int>(target_box->dims()[0]),
static_cast<int>(target_box->dims()[1]),
static_cast<int>(target_box->dims()[2]),
1,
},
{0, 2, 3, 1});
// transpose(box_var->mutable_data<float>(),
// box_var_trans.mutable_data<float>(),
// {static_cast<int>(box_var->dims()[0]),
// static_cast<int>(box_var->dims()[0]),
// 1,
// 1},
// {0, 2, 3, 1});
target_box->CopyDataFrom(target_box_trans);
LaunchOp(op,
{prior_box_var_name, taget_box_var_name, box_var_var_name},
{output_box_var_name});
// execute reference implementation and save to output tensor('out')
// compare results
auto *output_data = output_box->mutable_data<float>();
auto *output_ref_data = output_box_ref->mutable_data<float>();
Tensor output_trans;
output_trans.Resize(output_box->dims());
// row * len * 1 * col -> row * col * len * 1
transpose(output_data,
output_trans.mutable_data<float>(),
{static_cast<int>(output_box->dims()[0]),
static_cast<int>(output_box->dims()[2]),
1,
static_cast<int>(output_box->dims()[1])},
{0, 3, 1, 2});
output_data = output_trans.mutable_data<float>();
// ToFile(*output_box, "output_mlu_before_trans.txt");
// ToFile(&output_trans, "output_mlu.txt");
// ToFile(output_box_ref, "output_cpu.txt");
for (int i = 0; i < output_box->dims().production(); i++) {
VLOG(6) << i;
EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-2);
}
}
TEST(MLUBridges, prior_density_box) {
int row = 1;
int col = 20560;
int len = 4;
int axis = 0;
cnmlBoxCodeType_t code_type = cnmlBoxCodeType_t::Decode;
bool box_normalized = true;
test_box_coder(row, col, len, axis, code_type, box_normalized);
}
} // namespace mlu
} // namespace subgraph
} // namespace lite
} // namespace paddle
USE_SUBGRAPH_BRIDGE(box_coder, kMLU);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/mlu/bridges/graph.h"
#include "lite/kernels/mlu/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace mlu {
void inferShape(Tensor* input,
Tensor* boxes,
Tensor* variances,
std::vector<float> fixed_ratios,
std::vector<int> densities) {
auto feat_height = input->dims()[2];
auto feat_width = input->dims()[3];
int num_priors = 0;
for (size_t i = 0; i < densities.size(); ++i) {
num_priors += (fixed_ratios.size()) * (pow(densities[i], 2));
}
std::vector<int64_t> boxes_shape = {feat_width, feat_height, num_priors, 4};
std::vector<int64_t> vars_shape = boxes_shape;
boxes->Resize(boxes_shape);
variances->Resize(vars_shape);
}
int DensityPriorBoxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[MLU] Converting " + op_type + "...";
auto input_name = op_info->Input("Input").front();
auto image_name = op_info->Input("Image").front();
auto boxes_name = op_info->Output("Boxes").front();
auto variances_name = op_info->Output("Variances").front();
auto input_var = scope->FindVar(input_name)->GetMutable<Tensor>();
auto image_var = scope->FindVar(image_name)->GetMutable<Tensor>();
auto boxes_var = scope->FindVar(boxes_name)->GetMutable<Tensor>();
auto variances_var = scope->FindVar(variances_name)->GetMutable<Tensor>();
auto clip = op_info->GetAttr<bool>("clip");
auto fixed_sizes = op_info->GetAttr<std::vector<float>>("fixed_sizes");
auto fixed_ratios = op_info->GetAttr<std::vector<float>>("fixed_ratios");
auto variances_ = op_info->GetAttr<std::vector<float>>("variances");
auto densities = op_info->GetAttr<std::vector<int>>("densities");
auto offset = op_info->GetAttr<float>("offset");
auto step_w = op_info->GetAttr<float>("step_w");
auto step_h = op_info->GetAttr<float>("step_h");
inferShape(input_var, boxes_var, variances_var, fixed_ratios, densities);
auto input_dims = input_var->dims();
auto image_dims = image_var->dims();
auto boxes_dims = boxes_var->dims();
auto variances_dims = variances_var->dims();
auto feat_tensor = graph->GetNode(input_name);
auto image_tensor = graph->GetNode(image_name);
auto boxes_tensor_trans = graph->AddNode(boxes_name + ".trans.boxes",
boxes_dims.Vectorize(),
CNML_TENSOR,
CNML_NHWC,
graph->FPType());
auto variances_tensor_trans = graph->AddNode(variances_name + ".trans.vars",
variances_dims.Vectorize(),
CNML_TENSOR,
CNML_NHWC,
graph->FPType());
bool float32_precision = false;
if (graph->FPType() == CNML_DATA_FLOAT32) {
float32_precision = true;
}
// ==================== DEBUG ==================
VLOG(6) << "input_name: " << input_name;
VLOG(6) << "image_name: " << image_name;
VLOG(6) << "boxes_name: " << boxes_name;
VLOG(6) << "variances_name: " << variances_name;
VLOG(6) << "input_dims : " << input_dims;
VLOG(6) << "image_dims : " << image_dims;
VLOG(6) << "boxes_dims : " << boxes_dims;
VLOG(6) << "variances_dims : " << variances_dims;
VLOG(6) << "clip : " << clip;
VLOG(6) << "fixed_sizes : ";
for (auto tmp : fixed_sizes) {
VLOG(6) << tmp;
}
VLOG(6) << "fixed_ratios : ";
for (auto tmp : fixed_ratios) {
VLOG(6) << tmp;
}
VLOG(6) << "variances_ : ";
for (auto tmp : variances_) {
VLOG(6) << tmp;
}
VLOG(6) << "densities : ";
for (auto tmp : densities) {
VLOG(6) << tmp;
}
VLOG(6) << "offset : " << offset;
VLOG(6) << "clip : " << clip;
int cnml_boxes_shape[4];
CNML_CALL(
cnmlGetTensorShape(boxes_tensor_trans->mlu_tensor(), cnml_boxes_shape));
VLOG(6) << "cnml_boxes_shape";
for (size_t i = 0; i < 4; i++) {
VLOG(6) << cnml_boxes_shape[i];
}
int cnml_vars_shape[4];
VLOG(6) << "cnml_vars_shape";
CNML_CALL(cnmlGetTensorShape(variances_tensor_trans->mlu_tensor(),
cnml_vars_shape));
for (size_t i = 0; i < 4; i++) {
VLOG(6) << cnml_vars_shape[i];
}
int feat_width = input_dims[3];
int feat_height = input_dims[2];
int image_width = image_dims[3];
int image_height = image_dims[2];
// ==================== DEBUG END ==================
cnmlPluginDensityPriorBoxOpParam_t op_param;
cnmlCreatePluginDensityPriorBoxOpParam(&op_param,
feat_width,
feat_height,
image_width,
image_height,
variances_.data(),
variances_.size(),
densities.data(),
densities.size(),
fixed_sizes.data(),
fixed_sizes.size(),
fixed_ratios.data(),
fixed_ratios.size(),
clip,
step_w,
step_h,
offset,
float32_precision,
TargetWrapperMlu::MLUCoreVersion());
cnmlTensor_t input_tensors[2];
input_tensors[0] = feat_tensor->mlu_tensor();
input_tensors[1] = image_tensor->mlu_tensor();
cnmlTensor_t output_tensors[2];
output_tensors[0] = boxes_tensor_trans->mlu_tensor();
output_tensors[1] = variances_tensor_trans->mlu_tensor();
cnmlBaseOp_t density_prior_box_op;
CNML_CALL(cnmlCreatePluginDensityPriorBoxOp(
&density_prior_box_op, op_param, input_tensors, output_tensors));
std::vector<int> nchw_to_nhwc_axis = {0, 2, 3, 1};
// ============== Boxes Trans =======================
auto boxes_tensor = graph->AddNode(boxes_name,
boxes_dims.Vectorize(),
CNML_TENSOR,
CNML_NCHW,
graph->FPType());
cnmlBaseOp_t trans_boxes_op{nullptr};
cnmlNdTransposeOpParam_t trans_boxes_param{nullptr};
CNML_CALL(cnmlCreateNdTransposeOpParam(
&trans_boxes_param, nchw_to_nhwc_axis.data(), nchw_to_nhwc_axis.size()));
CNML_CALL(cnmlCreateNdTransposeProOp(&trans_boxes_op,
boxes_tensor_trans->mlu_tensor(),
boxes_tensor->mlu_tensor(),
trans_boxes_param));
// ============== Boxes Trans End ===================
// ============== Vars Trans =======================
auto variances_tensor = graph->AddNode(variances_name,
variances_dims.Vectorize(),
CNML_TENSOR,
CNML_NCHW,
graph->FPType());
cnmlBaseOp_t trans_vars_op{nullptr};
cnmlNdTransposeOpParam_t trans_vars_param{nullptr};
CNML_CALL(cnmlCreateNdTransposeOpParam(
&trans_vars_param, nchw_to_nhwc_axis.data(), nchw_to_nhwc_axis.size()));
CNML_CALL(cnmlCreateNdTransposeProOp(&trans_vars_op,
variances_tensor_trans->mlu_tensor(),
variances_tensor->mlu_tensor(),
trans_vars_param));
// ============== Vars Trans End ===================
// cnmlSetOperationComputingLayout(density_prior_box_op,CNML_NCHW);
// cnmlSetTensorComputingLayoutInOperation(
// density_prior_box_op, boxes_tensor->mlu_tensor(), CNML_NCHW);
// cnmlSetTensorComputingLayoutInOperation(
// density_prior_box_op, variances_tensor->mlu_tensor(), CNML_NCHW);
graph->FuseOp(trans_boxes_op);
graph->FuseOp(density_prior_box_op);
graph->FuseOp(trans_vars_op);
// cnmlDestroyPluginDensityPriorBoxOpParam(&op_param);
return SUCCESS;
}
} // namespace mlu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(density_prior_box,
kMLU,
paddle::lite::subgraph::mlu::DensityPriorBoxConverter);
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/density_prior_box_op.h"
#include <gtest/gtest.h>
#include <random>
#include "lite/core/op_registry.h"
#include "lite/kernels/mlu/bridges/test_helper.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace mlu {
void inferShape_(Tensor* input,
Tensor* boxes,
Tensor* variances,
std::vector<float> fixed_ratios,
std::vector<int> densities) {
auto feat_height = input->dims()[2];
auto feat_width = input->dims()[3];
int num_priors = 0;
for (size_t i = 0; i < densities.size(); ++i) {
num_priors += (fixed_ratios.size()) * (pow(densities[i], 2));
}
std::vector<int64_t> boxes_shape = {feat_width, feat_height, num_priors, 4};
std::vector<int64_t> vars_shape = boxes_shape;
boxes->Resize(boxes_shape);
variances->Resize(vars_shape);
}
void prior_density_box_ref(
const std::shared_ptr<operators::DensityPriorBoxOpLite> op) {
Scope* scope = op->scope();
const OpInfo* op_info = op->op_info();
auto input =
scope->FindVar(op_info->Input("Input").front())->GetMutable<Tensor>();
auto image =
scope->FindVar(op_info->Input("Image").front())->GetMutable<Tensor>();
auto boxes_tensor =
scope->FindVar(op_info->Output("Boxes").front())->GetMutable<Tensor>();
auto variances = scope->FindVar(op_info->Output("Variances").front())
->GetMutable<Tensor>();
auto clip = op_info->GetAttr<bool>("clip");
auto fixed_sizes = op_info->GetAttr<std::vector<float>>("fixed_sizes");
auto fixed_ratios = op_info->GetAttr<std::vector<float>>("fixed_ratios");
auto variances_ = op_info->GetAttr<std::vector<float>>("variances");
auto densities = op_info->GetAttr<std::vector<int>>("densities");
auto offset = op_info->GetAttr<float>("offset");
auto step_w = op_info->GetAttr<float>("step_w");
auto step_h = op_info->GetAttr<float>("step_h");
std::vector<int> input_shape = {128, 128};
std::vector<int> image_shape = {256, 256};
int num_priors = 0;
for (size_t i = 0; i < densities.size(); ++i) {
num_priors += (fixed_ratios.size()) * (pow(densities[i], 2));
}
int boxes_count = boxes_tensor->dims().production();
float* boxes = boxes_tensor->mutable_data<float>();
float* vars = variances->mutable_data<float>();
auto img_width = image->dims()[3];
auto img_height = image->dims()[2];
auto feature_width = input->dims()[3];
auto feature_height = input->dims()[2];
float step_width, step_height;
if (step_w == 0 || step_h == 0) {
step_width = static_cast<float>(img_width) / feature_width;
step_height = static_cast<float>(img_height) / feature_height;
} else {
step_width = step_w;
step_height = step_h;
}
int step_average = static_cast<int>((step_width + step_height) * 0.5);
std::vector<float> sqrt_fixed_ratios;
for (size_t i = 0; i < fixed_ratios.size(); i++) {
sqrt_fixed_ratios.push_back(sqrt(fixed_ratios[i]));
}
for (int h = 0; h < feature_height; ++h) {
for (int w = 0; w < feature_width; ++w) {
float center_x = (w + offset) * step_width;
float center_y = (h + offset) * step_height;
int idx = 0;
// Generate density prior boxes with fixed sizes.
for (size_t s = 0; s < fixed_sizes.size(); ++s) {
auto fixed_size = fixed_sizes[s];
int density = densities[s];
int shift = step_average / density;
// Generate density prior boxes with fixed ratios.
for (size_t r = 0; r < fixed_ratios.size(); ++r) {
float box_width_ratio = fixed_size * sqrt_fixed_ratios[r];
float box_height_ratio = fixed_size / sqrt_fixed_ratios[r];
float density_center_x = center_x - step_average / 2. + shift / 2.;
float density_center_y = center_y - step_average / 2. + shift / 2.;
for (int di = 0; di < density; ++di) {
for (int dj = 0; dj < density; ++dj) {
float center_x_temp = density_center_x + dj * shift;
float center_y_temp = density_center_y + di * shift;
boxes[h * feature_width * num_priors * 4 + w * num_priors * 4 +
idx * 4 + 0] =
std::max((center_x_temp - box_width_ratio / 2.) / img_width,
0.);
boxes[h * feature_width * num_priors * 4 + w * num_priors * 4 +
idx * 4 + 1] =
std::max((center_y_temp - box_height_ratio / 2.) / img_height,
0.);
boxes[h * feature_width * num_priors * 4 + w * num_priors * 4 +
idx * 4 + 2] =
std::min((center_x_temp + box_width_ratio / 2.) / img_width,
1.);
boxes[h * feature_width * num_priors * 4 + w * num_priors * 4 +
idx * 4 + 3] =
std::min((center_y_temp + box_height_ratio / 2.) / img_height,
1.);
idx++;
}
}
}
}
}
}
if (clip) {
std::transform(boxes, boxes + boxes_count, boxes, [](float v) -> float {
return std::min<float>(std::max<float>(v, 0.), 1.);
});
}
int box_num = feature_height * feature_width * num_priors;
for (int i = 0; i < box_num; ++i) {
for (size_t j = 0; j < variances_.size(); ++j) {
vars[i * variances_.size() + j] = variances_[j];
}
}
}
void test_prior_density_box(int feat_h,
int feat_w,
int img_h,
int img_w,
bool clip,
std::vector<float> fixed_sizes,
std::vector<float> fixed_ratios,
std::vector<float> variances_,
std::vector<int> densities,
float step_w,
float step_h,
float offset) {
// prepare input&output variables
Scope scope;
std::string input_var_name("Input");
std::string image_var_name("Image");
std::string boxes_var_name("Boxes");
std::string variances_var_name("Variances");
std::string boxes_ref_var_name("Boxes_ref");
std::string variances_ref_var_name("Variances_ref");
auto* input = scope.Var(input_var_name)->GetMutable<Tensor>();
auto* image = scope.Var(image_var_name)->GetMutable<Tensor>();
auto* boxes = scope.Var(boxes_var_name)->GetMutable<Tensor>();
auto* variances = scope.Var(variances_var_name)->GetMutable<Tensor>();
auto* boxes_ref = scope.Var(boxes_ref_var_name)->GetMutable<Tensor>();
auto* variances_ref = scope.Var(variances_ref_var_name)->GetMutable<Tensor>();
input->Resize({1, 1, feat_h, feat_w});
image->Resize({1, 1, img_h, img_w});
// initialize input&output data
FillTensor<float>(input);
FillTensor<float, int>(image);
// initialize op desc
cpp::OpDesc opdesc;
opdesc.SetType("density_prior_box");
opdesc.SetInput("Input", {input_var_name});
opdesc.SetInput("Image", {image_var_name});
opdesc.SetOutput("Boxes", {boxes_var_name});
opdesc.SetOutput("Variances", {variances_var_name});
opdesc.SetAttr("fixed_sizes", fixed_sizes);
opdesc.SetAttr("fixed_ratios", fixed_ratios);
opdesc.SetAttr("variances", variances_);
opdesc.SetAttr("densities", densities);
opdesc.SetAttr("offset", offset);
opdesc.SetAttr("clip", clip);
opdesc.SetAttr("step_w", step_w);
opdesc.SetAttr("step_h", step_h);
inferShape_(input, boxes, variances, fixed_ratios, densities);
inferShape_(input, boxes_ref, variances_ref, fixed_ratios, densities);
auto op = CreateOp<operators::DensityPriorBoxOpLite>(opdesc, &scope);
prior_density_box_ref(op);
boxes_ref->CopyDataFrom(*boxes);
variances_ref->CopyDataFrom(*variances);
LaunchOp(op,
{input_var_name, image_var_name},
{boxes_var_name, variances_var_name});
// execute reference implementation and save to output tensor('out')
// ===================== Trans From NHWC to NCHW ====================
Tensor boxes_trans;
boxes_trans.Resize(boxes->dims().Vectorize());
transpose(boxes->mutable_data<float>(),
boxes_trans.mutable_data<float>(),
{static_cast<int>(boxes->dims()[0]),
static_cast<int>(boxes->dims()[2]),
static_cast<int>(boxes->dims()[3]),
static_cast<int>(boxes->dims()[1])},
{0, 3, 1, 2});
boxes->CopyDataFrom(boxes_trans);
Tensor vars_trans;
vars_trans.Resize(variances->dims().Vectorize());
transpose(variances->mutable_data<float>(),
vars_trans.mutable_data<float>(),
{static_cast<int>(variances->dims()[0]),
static_cast<int>(variances->dims()[2]),
static_cast<int>(variances->dims()[3]),
static_cast<int>(variances->dims()[1])},
{0, 3, 1, 2});
variances->CopyDataFrom(vars_trans);
// compare results
auto* boxes_data = boxes->mutable_data<float>();
auto* boxes_ref_data = boxes_ref->mutable_data<float>();
auto* variances_data = variances->mutable_data<float>();
auto* variances_ref_data = variances_ref->mutable_data<float>();
// ToFile(*variances, "var_mlu.txt");
// ToFile(*variances_ref, "var_cpu.txt");
// ToFile(*boxes, "box_mlu.txt");
// ToFile(*boxes_ref, "box_cpu.txt");
for (int i = 0; i < variances->dims().production(); i++) {
VLOG(6) << i;
EXPECT_NEAR(variances_data[i], variances_ref_data[i], 1e-5);
}
for (int i = 0; i < boxes->dims().production(); i++) {
VLOG(6) << i;
EXPECT_NEAR(boxes_data[i], boxes_ref_data[i], 1e-5);
}
}
TEST(MLUBridges, prior_density_box) {
// std::vector<int> input_shape = {128, 128};
// std::vector<int> image_shape = {256, 256};
// std::vector<float> fixed_sizes = {8 * 16, 16 * 16, 32 * 16};
// std::vector<float> fixed_sizes = {8, 16, 32};
// std::vector<float> fixed_ratios = {0.5, 1, 2};
// std::vector<int> densities = {1, 1, 1};
std::vector<int> input_shape = {16, 16};
std::vector<int> image_shape = {32, 32};
std::vector<float> fixed_sizes = {8, 16, 32};
std::vector<float> fixed_ratios = {0.5, 1, 2};
std::vector<int> densities = {1, 1, 1};
std::vector<float> variances = {0.1, 0.1, 0.2, 0.2};
bool clip = true;
float offset = 0.5;
float step_h = 0;
float step_w = 0;
test_prior_density_box(input_shape[1],
input_shape[0],
image_shape[1],
image_shape[0],
clip,
fixed_sizes,
fixed_ratios,
variances,
densities,
offset,
step_h,
step_w);
}
} // namespace mlu
} // namespace subgraph
} // namespace lite
} // namespace paddle
USE_SUBGRAPH_BRIDGE(density_prior_box, kMLU);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include "lite/kernels/mlu/bridges/graph.h"
#include "lite/kernels/mlu/bridges/multiclass_nms_api.h"
#include "lite/kernels/mlu/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/operators/multiclass_nms_op.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace mlu {
int MulticlassNmsConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[MLU] Converting " + op_type + "...";
auto bboxes_name = op_info->Input("BBoxes").front();
auto scores_name = op_info->Input("Scores").front();
auto out_name = op_info->Output("Out").front();
auto* bboxes = scope->FindTensor(bboxes_name);
auto* scores = scope->FindTensor(scores_name);
auto* out = scope->FindTensor(out_name);
auto background_label = op_info->GetAttr<int>("background_label");
auto keep_top_k = op_info->GetAttr<int>("keep_top_k");
auto nms_top_k = op_info->GetAttr<int>("nms_top_k");
auto score_threshold = op_info->GetAttr<float>("score_threshold");
auto nms_threshold = op_info->GetAttr<float>("nms_threshold");
auto nms_eta = op_info->GetAttr<float>("nms_eta");
bool normalized = false;
if (op_info->HasAttr("normalized")) {
normalized = op_info->GetAttr<bool>("normalized");
}
auto bboxes_dims = bboxes->dims();
auto scores_dims = scores->dims();
auto batch_size = bboxes->dims()[0];
auto num_boxes = bboxes->dims()[1];
auto class_num = scores->dims()[1];
keep_top_k = keep_top_k == -1 ? num_boxes : keep_top_k;
// ?????????????
int box_size = 4;
std::vector<int64_t> outs_shape = {batch_size, keep_top_k, box_size + 2};
const_cast<Tensor*>(out)->Resize(outs_shape);
auto out_dims = out->dims();
// LOG(WARNING) << "CORE NUM SHOULD BE 4!!!!" << std::endl;
int core_num = TargetWrapperMlu::MLUCoreNumber();
// expect {batch_size, num_boxes, box_size} in compute
// while {batch_size, box_size,num_boxes} on mlu
// while {batch_size, num_boxes, box_size} on cpu
// so mlu data_flow and mlu compute layout mismatch, should set bboxes_tensor
// as NCHW
auto bboxes_tensor = graph->GetNode(bboxes_name);
// expect {batch_size, class_num, num_boxes} in compute
// while {batch_size, num_boxes,class_num } on mlu
// while {batch_size, class_num, num_boxes} on cpu
// so mlu data_flow and mlu compute layout mismatch, should set scores_tensor
// as NCHW
auto scores_tensor = graph->GetNode(scores_name);
// expect batch_size, keep_top_k, box_size + 2 in compute
// while batch_size, box_size + 2, keep_top_k on mlu
// while batch_size, keep_top_k, box_size + 2 on cpu
// so mlu data_flow and mlu compute layout mismatch, should set out_tensor as
auto out_tensor = graph->AddNode(
out_name, out_dims.Vectorize(), CNML_TENSOR, CNML_NCHW, graph->FPType());
// trans bboxes {batch_size, num_boxes, box_size}
auto bboxes_trans_tensor = graph->AddNode(bboxes_name + ".trans.bboxes",
bboxes_dims.Vectorize(),
CNML_TENSOR,
CNML_NCHW,
graph->FPType(),
CNML_NCHW);
// trans scores {batch_size, class_num, num_boxes}
auto scores_trans_tensor = graph->AddNode(bboxes_name + ".trans.scores",
scores_dims.Vectorize(),
CNML_TENSOR,
CNML_NCHW,
graph->FPType(),
CNML_NCHW);
// trans out {batch_size, keep_top_k, box_size + 2}
auto out_trans_tensor = graph->AddNode(out_name + ".trans.out",
out_dims.Vectorize(),
CNML_TENSOR,
CNML_NCHW,
graph->FPType(),
CNML_NCHW);
std::string out_num_name = "nms_out_num";
auto* out_num = scope->NewTensor(out_num_name);
std::vector<int64_t> out_num_shape = {batch_size, 1};
out_num->Resize(out_num_shape);
auto num_outs_tensor = graph->AddNode(
out_num_name, out_num_shape, CNML_TENSOR, CNML_NCHW, graph->FPType());
bool float_precision = false;
if (graph->FPType() == CNML_DATA_FLOAT32) {
float_precision = true;
}
int64_t workspace_mem_size =
4 * std::min(static_cast<int>(batch_size), core_num) *
(14 * num_boxes + 8 * class_num * num_boxes);
int64_t workspace_fp_size = workspace_mem_size / 4;
if (!float_precision) {
// when run as fp16, mlu size will be half of cpu size, so workspace_fp_size
// should be double
workspace_fp_size = workspace_mem_size / 2;
}
std::vector<int64_t> workspace_shape = {workspace_fp_size};
std::string nms_workspace_name =
"nms_workspace"; // expect only one nms in same model
auto workspace_tensor = graph->AddNode(nms_workspace_name,
workspace_shape,
CNML_CONST,
CNML_NCHW,
graph->FPType());
std::vector<float> workspace_cpu(workspace_shape[0]);
// void* work_space_ = nullptr;
// cnrtMalloc(&work_space_, workspace_shape[0]);
VLOG(6) << "workspace_shape :" << workspace_shape[0];
// VLOG(6) << "workspace_shape mlu ptr :"
// << reinterpret_cast<void*>(work_space_);
// =================== Bboxes Trans ============================
std::vector<int> bboxes_axis = {0, 2, 1};
cnmlBaseOp_t bboxes_trans_op{nullptr};
cnmlNdTransposeOpParam_t bboxes_trans_param{nullptr};
CNML_CALL(cnmlCreateNdTransposeOpParam(
&bboxes_trans_param, bboxes_axis.data(), bboxes_axis.size()));
CNML_CALL(cnmlCreateNdTransposeProOp(&bboxes_trans_op,
bboxes_tensor->mlu_tensor(),
bboxes_trans_tensor->mlu_tensor(),
bboxes_trans_param));
// =================== Bboxes Trans END ========================
// =================== Scores Trans ============================
std::vector<int> scores_axis = {0, 2, 1};
cnmlBaseOp_t scores_trans_op{nullptr};
cnmlNdTransposeOpParam_t scores_trans_param{nullptr};
CNML_CALL(cnmlCreateNdTransposeOpParam(
&scores_trans_param, scores_axis.data(), scores_axis.size()));
CNML_CALL(cnmlCreateNdTransposeProOp(&scores_trans_op,
scores_tensor->mlu_tensor(),
scores_trans_tensor->mlu_tensor(),
scores_trans_param));
// =================== Scores Trans END ========================
multiclass_nms_param_t params_;
create_multiclass_nms_param(&params_,
score_threshold,
nms_top_k,
keep_top_k,
nms_threshold,
normalized,
nms_eta,
background_label,
batch_size,
class_num,
num_boxes,
box_size);
cnmlBaseOp_t multiclass_nms_op;
create_multiclass_nms_op(&multiclass_nms_op,
params_,
bboxes_trans_tensor->mlu_tensor(),
scores_trans_tensor->mlu_tensor(),
out_trans_tensor->mlu_tensor(),
num_outs_tensor->mlu_tensor(),
workspace_tensor->mlu_tensor(),
float_precision);
graph->BindConstRawData(
nms_workspace_name, workspace_cpu.data(), workspace_cpu.size(), true);
// =================== Out Trans ============================
std::vector<int> out_axis = {0, 2, 1};
cnmlBaseOp_t out_trans_op{nullptr};
cnmlNdTransposeOpParam_t out_trans_param{nullptr};
CNML_CALL(cnmlCreateNdTransposeOpParam(
&out_trans_param, out_axis.data(), out_axis.size()));
CNML_CALL(cnmlCreateNdTransposeProOp(&out_trans_op,
out_trans_tensor->mlu_tensor(),
out_tensor->mlu_tensor(),
out_trans_param));
// =================== Out Trans END ========================
// =================== DEBUG ====================
VLOG(6) << "bboxes_name: " << bboxes_name;
VLOG(6) << "scores_name: " << scores_name;
VLOG(6) << "out_name: " << out_name;
VLOG(6) << "background_label: " << background_label;
VLOG(6) << "keep_top_k: " << keep_top_k;
VLOG(6) << "nms_top_k: " << nms_top_k;
VLOG(6) << "score_threshold: " << score_threshold;
VLOG(6) << "nms_threshold: " << nms_threshold;
VLOG(6) << "nms_eta: " << nms_eta;
VLOG(6) << "normalized: " << normalized;
VLOG(6) << "bboxes_dims: " << bboxes_dims;
VLOG(6) << "scores_dims: " << scores_dims;
VLOG(6) << "out_dims: " << out_dims;
VLOG(6) << "out_dims: " << out->dims();
VLOG(6) << "batch_size: " << batch_size;
VLOG(6) << "num_boxes : " << num_boxes;
VLOG(6) << "class_num: " << class_num;
// cnmlPrintTensor(bboxes_tensor->mlu_tensor(), CNML_TENSOR);
// cnmlPrintTensor(bboxes_trans_tensor->mlu_tensor(), CNML_TENSOR);
// cnmlPrintTensor(scores_tensor->mlu_tensor(), CNML_TENSOR);
// cnmlPrintTensor(scores_trans_tensor->mlu_tensor(), CNML_TENSOR);
// cnmlPrintTensor(out_tensor->mlu_tensor(), CNML_TENSOR);
// cnmlPrintTensor(out_trans_tensor->mlu_tensor(), CNML_TENSOR);
// cnmlPrintTensor(num_outs_tensor->mlu_tensor(), CNML_TENSOR);
// =================== DEBUG END ================
graph->FuseOp(bboxes_trans_op);
graph->FuseOp(scores_trans_op);
graph->FuseOp(multiclass_nms_op);
graph->FuseOp(out_trans_op);
return SUCCESS;
}
} // namespace mlu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(multiclass_nms,
kMLU,
paddle::lite::subgraph::mlu::MulticlassNmsConverter);
// Copyright (c) 2020 smarsu. All Rights Reserved.
#include "lite/kernels/mlu/bridges/multiclass_nms_api.h"
#include <cnml.h>
#include <cnrt.h>
#include <stdlib.h>
#include <fstream>
#include <iostream>
#include <vector>
extern "C" {
void multiclass_nms_paddle_entry(void *bboxes,
void *scores,
void *outs,
void *num_outs,
float score_threshold,
int nms_top_k,
int keep_top_k,
float nms_threshold,
bool normalized,
float nms_eta,
int background_label,
int batch_size,
int class_num,
int num_boxes,
int box_size,
void *work_space,
DataType data_type);
} // extern "C"
void create_multiclass_nms_param(multiclass_nms_param_t *params_ptr,
float score_threshold,
int nms_top_k,
int keep_top_k,
float nms_threshold,
bool normalized,
float nms_eta,
int background_label,
int batch_size,
int class_num,
int num_boxes,
int box_size) {
multiclass_nms_param_t params =
(multiclass_nms_param_t)malloc(sizeof(struct multiclass_nms_param));
params->score_threshold = score_threshold;
params->nms_top_k = nms_top_k;
params->keep_top_k = keep_top_k;
params->nms_threshold = nms_threshold;
params->normalized = normalized;
params->nms_eta = nms_eta;
params->background_label = background_label;
params->batch_size = batch_size;
params->class_num = class_num;
params->num_boxes = num_boxes;
params->box_size = box_size;
*params_ptr = params;
return;
}
void destory_multiclass_nms_param(multiclass_nms_param_t *params) {
if (*params != NULL) {
free(*params);
}
}
int create_multiclass_nms_op(cnmlBaseOp_t *op_ptr,
multiclass_nms_param_t nms_param,
cnmlTensor_t bboxes,
cnmlTensor_t scores,
cnmlTensor_t outs,
cnmlTensor_t num_outs,
cnmlTensor_t workspace_tensor,
bool float_precision) {
DataType data_type = kFloat16;
if (float_precision) {
data_type = kFloat32;
}
if (nms_param->keep_top_k == -1) {
nms_param->keep_top_k = nms_param->num_boxes;
}
cnrtKernelParamsBuffer_t params;
cnrtGetKernelParamsBuffer(&params);
cnrtKernelParamsBufferMarkInput(params);
cnrtKernelParamsBufferMarkInput(params);
cnrtKernelParamsBufferMarkOutput(params);
cnrtKernelParamsBufferMarkOutput(params);
cnrtKernelParamsBufferAddParam(
params, &nms_param->score_threshold, sizeof(float));
cnrtKernelParamsBufferAddParam(params, &nms_param->nms_top_k, sizeof(int));
cnrtKernelParamsBufferAddParam(params, &nms_param->keep_top_k, sizeof(int));
cnrtKernelParamsBufferAddParam(
params, &nms_param->nms_threshold, sizeof(float));
cnrtKernelParamsBufferAddParam(params, &nms_param->normalized, sizeof(bool));
cnrtKernelParamsBufferAddParam(params, &nms_param->nms_eta, sizeof(float));
cnrtKernelParamsBufferAddParam(
params, &nms_param->background_label, sizeof(int));
cnrtKernelParamsBufferAddParam(params, &nms_param->batch_size, sizeof(int));
cnrtKernelParamsBufferAddParam(params, &nms_param->class_num, sizeof(int));
cnrtKernelParamsBufferAddParam(params, &nms_param->num_boxes, sizeof(int));
cnrtKernelParamsBufferAddParam(params, &nms_param->box_size, sizeof(int));
// cnrtKernelParamsBufferAddParam(
// params, &nms_param->work_space, sizeof(void *));
cnrtKernelParamsBufferMarkStatic(params);
cnrtKernelParamsBufferAddParam(params, &data_type, sizeof(DataType));
cnmlTensor_t input_tensors[2];
input_tensors[0] = bboxes;
input_tensors[1] = scores;
cnmlTensor_t output_tensors[2];
output_tensors[0] = outs;
output_tensors[1] = num_outs;
cnmlTensor_t static_tensors[1];
static_tensors[0] = workspace_tensor;
cnmlCreatePluginOp(op_ptr,
"multiclass_nms_paddle",
reinterpret_cast<void *>(multiclass_nms_paddle_entry),
params,
input_tensors,
2,
output_tensors,
2,
static_tensors,
1);
cnrtDestroyKernelParamsBuffer(params);
return 0;
}
// Copyright (c) 2020 smarsu. All Rights Reserved.
#ifndef LITE_KERNELS_MLU_BRIDGES_MULTICLASS_NMS_API_H_
#define LITE_KERNELS_MLU_BRIDGES_MULTICLASS_NMS_API_H_
// #define ALIGN_UP(a, b) (((a) + (b) - 1) / (b) * (b))
// #define ALIGN_DN(a, b) ((a) / (b) * (b))
// #define DIV_UP(a, b) (((a) + (b) - 1) / (b))
// #define DIV_DN(a, b) ((a) / (b))
// #define MAX(a, b) ((a) >= (b) ? (a) : (b))
// #define MIN(a, b) ((a) <= (b) ? (a) : (b))
// #define ABS(a) (((a) > 0) ? (a) : (-(a)))
// #define INIFITE 0x7F800000
#include <cnml.h>
#include <cnrt.h>
enum DataType {
kInvalid,
kFloat32,
kFloat16,
kUint8,
kInt8,
kInt16,
kInt32,
};
enum TopkSplitStrategy {
kAuto,
kSplitN,
kSplitC,
};
enum ColorType {
kGray,
kRGB,
kBGR,
kRGBA,
};
struct multiclass_nms_param {
float score_threshold;
int nms_top_k;
int keep_top_k;
float nms_threshold;
bool normalized;
float nms_eta;
int background_label;
int batch_size;
int class_num;
int num_boxes;
int box_size;
};
typedef struct multiclass_nms_param *multiclass_nms_param_t;
void create_multiclass_nms_param(multiclass_nms_param_t *params_ptr,
float score_threshold,
int nms_top_k,
int keep_top_k,
float nms_threshold,
bool normalized,
float nms_eta,
int background_label,
int batch_size,
int class_num,
int num_boxes,
int box_size);
void destory_multiclass_nms_param(multiclass_nms_param_t *params);
int create_multiclass_nms_op(cnmlBaseOp_t *op_ptr,
multiclass_nms_param_t nms_param,
cnmlTensor_t bboxes,
cnmlTensor_t scores,
cnmlTensor_t outs,
cnmlTensor_t num_outs,
cnmlTensor_t workspace_tensor,
bool float_precision);
#endif // LITE_KERNELS_MLU_BRIDGES_MULTICLASS_NMS_API_H_
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/multiclass_nms_op.h"
#include <gtest/gtest.h>
#include <algorithm>
#include <cstdlib>
#include <fstream>
#include <random>
#include "lite/core/op_registry.h"
#include "lite/kernels/mlu/bridges/test_helper.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace mlu {
std::vector<float> gen_random_boxes(int box_num, int img_w, int img_h) {
std::vector<float> boxes;
unsigned int SEED = 1;
for (size_t i = 0; i < box_num; i++) {
float x = rand_r(&SEED) / static_cast<double>(RAND_MAX) * img_w;
float w = rand_r(&SEED) / static_cast<double>(RAND_MAX) * img_w;
float y = rand_r(&SEED) / static_cast<double>(RAND_MAX) * img_h;
float h = rand_r(&SEED) / static_cast<double>(RAND_MAX) * img_h;
float xmin = std::max(0.0f, (x - w) / 2);
float ymin = std::max(0.0f, (y - h) / 2);
float xmax = std::min(static_cast<float>(img_w), (x + w) / 2);
float ymax = std::min(static_cast<float>(img_h), (y + h) / 2);
boxes.push_back(xmin);
boxes.push_back(ymin);
boxes.push_back(xmax);
boxes.push_back(ymax);
}
return boxes;
}
std::vector<float> gen_random_scores(int box_num, int class_num) {
std::vector<float> scores;
unsigned int SEED = 1;
for (size_t i = 0; i < box_num; i++) {
for (size_t i = 0; i < class_num; i++) {
scores.push_back(rand_r(&SEED) / static_cast<double>(RAND_MAX));
}
}
return scores;
}
float Area(float box[4]) {
float xmin = box[0];
float ymin = box[1];
float xmax = box[2];
float ymax = box[3];
CHECK(xmax > xmin) << "xmax: " << xmax << " xmin: " << xmin;
CHECK(ymax > ymin) << "ymax: " << ymax << " ymin: " << ymin;
float w = xmax - xmin;
float h = ymax - ymin;
return w * h;
}
// overlap may < 0
float overlap(float min1, float max1, float min2, float max2) {
return ((max1 - min1) + (max2 - min2)) -
(std::max(max2, max1) - std::min(min1, min2));
}
float IntersectionArea(float box1[4], float box2[4]) {
float box1_xmin = box1[0];
float box1_ymin = box1[1];
float box1_xmax = box1[2];
float box1_ymax = box1[3];
float box2_xmin = box2[0];
float box2_ymin = box2[1];
float box2_xmax = box2[2];
float box2_ymax = box2[3];
float x_overlap = overlap(box1_xmin, box1_xmax, box2_xmin, box2_xmax);
float y_overlap = overlap(box1_ymin, box1_ymax, box2_ymin, box2_ymax);
float intersection_area = x_overlap * y_overlap;
return std::max(intersection_area, 0.0f);
}
float IOU(float box1[4], float box2[4]) {
float area1 = Area(box1);
float area2 = Area(box2);
float intersection_area = IntersectionArea(box1, box2);
float union_area = area1 + area2 - intersection_area;
return intersection_area / union_area;
}
template <typename T>
void VecToFile(const std::vector<T>& vec, std::string filename) {
std::ofstream f(filename, std::ios::out);
if (!f) {
LOG(FATAL) << filename << "not exist!" << std::endl;
}
for (size_t i = 0; i < vec.size(); i++) {
f << vec[i] << std::endl;
}
f.close();
}
template <typename T>
void ArrayToFile(const T* data, int size, std::string filename) {
std::ofstream f(filename, std::ios::out);
if (!f) {
LOG(FATAL) << filename << "not exist!" << std::endl;
}
for (size_t i = 0; i < size; i++) {
f << data[i] << std::endl;
}
f.close();
}
void ToFile(Tensor* tensor, std::string file_name) {
int count = tensor->dims().production();
auto data = tensor->mutable_data<float>();
std::ostringstream outs;
for (size_t i = 0; i < count; i++) {
outs << data[i] << std::endl;
}
std::ofstream of;
of.open(file_name, std::ios::out);
of << outs.str();
of.close();
}
void FromFile(Tensor* tensor, std::string file_name) {
LOG(INFO) << " from file:" << file_name << std::endl;
std::ifstream f;
f.open(file_name, std::ios::in);
if (f.good()) {
for (size_t i = 0; i < tensor->dims().production(); i++) {
f >> tensor->mutable_data<float>()[i];
}
} else {
LOG(FATAL) << "can not open " << file_name << "to read" << std::endl;
}
f.close();
}
template <typename dtype>
static bool sort_score_pair_descend(const std::pair<float, dtype>& pair1,
const std::pair<float, dtype>& pair2) {
return pair1.first > pair2.first;
}
template <typename dtype>
void get_max_score_index(const dtype* scores,
int num,
float threshold,
int top_k,
std::vector<std::pair<dtype, int>>* score_index_vec) {
// ArrayToFile(scores, 100, "cpu_score.txt");
//! Generate index score pairs.
for (int i = 0; i < num; ++i) {
if (scores[i] > threshold) {
score_index_vec->push_back(std::make_pair(scores[i], i));
}
}
//! Sort the score pair according to the scores in descending order
std::stable_sort(score_index_vec->begin(),
score_index_vec->end(),
sort_score_pair_descend<int>);
//! Keep top_k scores if needed.
if (top_k > -1 && top_k < score_index_vec->size()) {
score_index_vec->resize(top_k);
}
}
template <typename dtype>
dtype bbox_size(const dtype* bbox, bool normalized = true) {
if (bbox[2] < bbox[0] || bbox[3] < bbox[1]) {
// If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0.
return dtype(0.);
} else {
const dtype width = bbox[2] - bbox[0];
const dtype height = bbox[3] - bbox[1];
if (normalized) {
return width * height;
} else {
// If bbox is not within range [0, 1].
return (width + 1) * (height + 1);
}
}
}
template <typename dtype>
dtype jaccard_overlap(const dtype* bbox1, const dtype* bbox2) {
if (bbox2[0] > bbox1[2] || bbox2[2] < bbox1[0] || bbox2[1] > bbox1[3] ||
bbox2[3] < bbox1[1]) {
return dtype(0.);
} else {
const dtype inter_xmin = std::max(bbox1[0], bbox2[0]);
const dtype inter_ymin = std::max(bbox1[1], bbox2[1]);
const dtype inter_xmax = std::min(bbox1[2], bbox2[2]);
const dtype inter_ymax = std::min(bbox1[3], bbox2[3]);
const dtype inter_width = inter_xmax - inter_xmin;
const dtype inter_height = inter_ymax - inter_ymin;
const dtype inter_size = inter_width * inter_height;
const dtype bbox1_size = bbox_size(bbox1);
const dtype bbox2_size = bbox_size(bbox2);
return inter_size / (bbox1_size + bbox2_size - inter_size);
}
}
template <typename dtype>
void apply_nms_fast(const dtype* bboxes,
const dtype* scores,
int num,
float score_threshold,
float nms_threshold,
float eta,
int top_k,
std::vector<int>* indices) {
// Get top_k scores (with corresponding indices).
std::vector<std::pair<dtype, int>> score_index_vec;
get_max_score_index(scores, num, score_threshold, top_k, &score_index_vec);
// Do nms.
float adaptive_threshold = nms_threshold;
indices->clear();
while (score_index_vec.size() != 0) {
const int idx = score_index_vec.front().second;
bool keep = true;
for (int k = 0; k < indices->size(); ++k) {
if (keep) {
const int kept_idx = (*indices)[k];
float overlap =
jaccard_overlap(bboxes + idx * 4, bboxes + kept_idx * 4);
keep = overlap <= adaptive_threshold;
} else {
break;
}
}
if (keep) {
indices->push_back(idx);
}
score_index_vec.erase(score_index_vec.begin());
if (keep && eta < 1 && adaptive_threshold > 0.5) {
adaptive_threshold *= eta;
}
}
}
template <typename dtype>
void multiclass_nms_compute_ref(const operators::MulticlassNmsParam& param,
int class_num,
const std::vector<int>& priors,
bool share_location,
std::vector<float>* result) {
int background_id = param.background_label;
int keep_topk = param.keep_top_k;
int nms_topk = param.nms_top_k;
float conf_thresh = param.score_threshold;
float nms_thresh = param.nms_threshold;
float nms_eta = param.nms_eta;
const dtype* bbox_data = param.bboxes->data<const dtype>();
const dtype* conf_data = param.scores->data<const dtype>();
(*result).clear();
int num_kept = 0;
std::vector<std::map<int, std::vector<int>>> all_indices;
int64_t conf_offset = 0;
int64_t bbox_offset = 0;
for (int i = 0; i < priors.size(); ++i) {
std::map<int, std::vector<int>> indices;
int num_det = 0;
int num_priors = priors[i];
int conf_idx = class_num * conf_offset;
int bbox_idx =
share_location ? bbox_offset * 4 : bbox_offset * 4 * class_num;
for (int c = 0; c < class_num; ++c) {
if (c == background_id) {
// Ignore background class
continue;
}
const dtype* cur_conf_data = conf_data + conf_idx + c * num_priors;
const dtype* cur_bbox_data = bbox_data + bbox_idx;
if (!share_location) {
cur_bbox_data += c * num_priors * 4;
}
apply_nms_fast(cur_bbox_data,
cur_conf_data,
num_priors,
conf_thresh,
nms_thresh,
nms_eta,
nms_topk,
&(indices[c]));
num_det += indices[c].size();
}
if (keep_topk > -1 && num_det > keep_topk) {
std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
for (auto it = indices.begin(); it != indices.end(); ++it) {
int label = it->first;
const std::vector<int>& label_indices = it->second;
for (int j = 0; j < label_indices.size(); ++j) {
int idx = label_indices[j];
float score = conf_data[conf_idx + label * num_priors + idx];
score_index_pairs.push_back(
std::make_pair(score, std::make_pair(label, idx)));
}
}
// Keep top k results per image.
std::stable_sort(score_index_pairs.begin(),
score_index_pairs.end(),
sort_score_pair_descend<std::pair<int, int>>);
score_index_pairs.resize(keep_topk);
// Store the new indices.
std::map<int, std::vector<int>> new_indices;
for (int j = 0; j < score_index_pairs.size(); ++j) {
int label = score_index_pairs[j].second.first;
int idx = score_index_pairs[j].second.second;
new_indices[label].push_back(idx);
}
all_indices.push_back(new_indices);
num_kept += keep_topk;
} else {
all_indices.push_back(indices);
num_kept += num_det;
}
conf_offset += num_priors;
bbox_offset += num_priors;
}
if (num_kept == 0) {
(*result).clear();
(*result).resize(1);
(*result)[0] = -1;
return;
} else {
(*result).resize(num_kept * 6);
}
int count = 0;
conf_offset = 0;
bbox_offset = 0;
for (int i = 0; i < priors.size(); ++i) {
int num_priors = priors[i];
int conf_idx = class_num * conf_offset;
int bbox_idx =
share_location ? bbox_offset * 4 : bbox_offset * 4 * class_num;
for (auto it = all_indices[i].begin(); it != all_indices[i].end(); ++it) {
int label = it->first;
std::vector<int>& indices = it->second;
const dtype* cur_conf_data = conf_data + conf_idx + label * num_priors;
const dtype* cur_bbox_data = bbox_data + bbox_idx;
if (!share_location) {
cur_bbox_data += label * num_priors * 4;
}
for (int j = 0; j < indices.size(); ++j) {
int idx = indices[j];
(*result)[count * 6] = label;
(*result)[count * 6 + 1] = cur_conf_data[idx];
for (int k = 0; k < 4; ++k) {
(*result)[count * 6 + 2 + k] = cur_bbox_data[idx * 4 + k];
}
++count;
}
}
conf_offset += num_priors;
bbox_offset += num_priors;
}
}
void test_multiclass_nms(float score_threshold,
int nms_top_k,
int keep_top_k,
float nms_threshold,
bool normalized,
float nms_eta,
int background_label,
int batch_size,
int class_num,
int num_boxes,
int box_size,
int core_num) {
// prepare input&output variables
Scope scope;
std::string bboxes_var_name = "BBoxes";
std::string scores_var_name = "Scores";
std::string out_var_name = "Out";
std::string out_num_var_name =
"nms_out_num"; // must be this name,corespond with
// lite/operators/multiclass_nms_op.cc
auto* bboxes = scope.Var(bboxes_var_name)->GetMutable<Tensor>();
auto* scores = scope.Var(scores_var_name)->GetMutable<Tensor>();
auto* out = scope.Var(out_var_name)->GetMutable<Tensor>();
auto* out_num = scope.Var(out_num_var_name)->GetMutable<Tensor>();
std::vector<int64_t> bboxes_shape = {batch_size, num_boxes, box_size};
std::vector<int64_t> scores_shape = {batch_size, class_num, num_boxes};
std::vector<int64_t> out_num_shape = {batch_size};
bboxes->Resize(bboxes_shape);
scores->Resize(scores_shape);
out_num->Resize(out_num_shape);
std::vector<float> bboxes_vec = gen_random_boxes(num_boxes, 1024, 1024);
std::vector<float> scores_vec = gen_random_scores(num_boxes, class_num);
for (size_t i = 1; i < bboxes_vec.size(); i++) {
bboxes->mutable_data<float>()[i] = bboxes_vec[i];
}
for (size_t i = 1; i < scores_vec.size(); i++) {
scores->mutable_data<float>()[i] = scores_vec[i];
}
// initialize op desc
cpp::OpDesc opdesc;
opdesc.SetType("multiclass_nms");
opdesc.SetInput("BBoxes", {bboxes_var_name});
opdesc.SetInput("Scores", {scores_var_name});
opdesc.SetOutput("Out", {out_var_name});
opdesc.SetAttr("background_label", background_label);
opdesc.SetAttr("keep_top_k", keep_top_k);
opdesc.SetAttr("nms_top_k", nms_top_k);
opdesc.SetAttr("score_threshold", score_threshold);
opdesc.SetAttr("nms_threshold", nms_threshold);
opdesc.SetAttr("nms_eta", nms_eta);
opdesc.SetAttr("normalized", normalized);
auto op = CreateOp<operators::MulticlassNmsOpLite>(opdesc, &scope);
// out_ref->CopyDataFrom(*out);
operators::MulticlassNmsParam param;
auto bboxes_name = opdesc.Input("BBoxes").front();
auto scores_name = opdesc.Input("Scores").front();
auto out_name = opdesc.Output("Out").front();
std::vector<std::string> output_arg_names = opdesc.OutputArgumentNames();
param.bboxes = bboxes;
param.scores = scores;
param.out = out;
param.background_label = opdesc.GetAttr<int>("background_label");
param.keep_top_k = opdesc.GetAttr<int>("keep_top_k");
param.nms_top_k = opdesc.GetAttr<int>("nms_top_k");
param.score_threshold = opdesc.GetAttr<float>("score_threshold");
param.nms_threshold = opdesc.GetAttr<float>("nms_threshold");
param.nms_eta = opdesc.GetAttr<float>("nms_eta");
if (opdesc.HasAttr("normalized")) {
param.normalized = opdesc.GetAttr<bool>("normalized");
}
const std::vector<int>& priors = {num_boxes}; // batch_size
std::vector<float> result;
multiclass_nms_compute_ref<float>(param, class_num, priors, true, &result);
// trans
Tensor bboxes_trans;
bboxes_trans.Resize({bboxes->dims()});
transpose(bboxes->mutable_data<float>(),
bboxes_trans.mutable_data<float>(),
{static_cast<int>(bboxes->dims()[0]),
static_cast<int>(bboxes->dims()[1]),
static_cast<int>(bboxes->dims()[2])},
{0, 2, 1});
bboxes->CopyDataFrom(bboxes_trans);
Tensor scores_trans;
scores_trans.Resize({scores->dims()});
transpose(scores->mutable_data<float>(),
scores_trans.mutable_data<float>(),
{static_cast<int>(scores->dims()[0]),
static_cast<int>(scores->dims()[1]),
static_cast<int>(scores->dims()[2])},
{0, 2, 1});
scores->CopyDataFrom(scores_trans);
LaunchOp(
op, {bboxes_var_name, scores_var_name}, {out_var_name, out_num_var_name});
// ToFile(out, "nms_out_mlu_before_trans.txt");
// out trans
Tensor out_trans;
out_trans.Resize({out->dims()});
transpose(out->mutable_data<float>(),
out_trans.mutable_data<float>(),
{static_cast<int>(out->dims()[0]),
static_cast<int>(out->dims()[2]),
static_cast<int>(out->dims()[1])}, // 0 2 1 on mlu
{0, 2, 1});
out->CopyDataFrom(out_trans);
// ToFile(out, "nms_out_mlu.txt");
// ToFile(out_num, "nms_out_num_mlu.txt");
// VecToFile(result, "nms_out_cpu.txt");
// auto out_data = out->mutable_data<float>();
int num_box = out->dims()[1];
int match_count = 0;
std::vector<int> matched_cpu_index;
for (int i = 0; i < num_box; i++) {
float mlu_box[4];
mlu_box[0] = out->mutable_data<float>()[i * 6 + 2];
mlu_box[1] = out->mutable_data<float>()[i * 6 + 3];
mlu_box[2] = out->mutable_data<float>()[i * 6 + 4];
mlu_box[3] = out->mutable_data<float>()[i * 6 + 5];
bool match = false;
for (size_t j = 0; j < num_box; j++) {
// if j th cpu box has matched some mlu box, do not use if to match other
// mlu box
if (std::find(std::begin(matched_cpu_index),
std::end(matched_cpu_index),
j) != std::end(matched_cpu_index)) {
continue;
}
float cpu_box[4];
cpu_box[0] = result[j * 6 + 2];
cpu_box[1] = result[j * 6 + 3];
cpu_box[2] = result[j * 6 + 4];
cpu_box[3] = result[j * 6 + 5];
if (IOU(mlu_box, cpu_box) >= 0.9) {
match = true;
matched_cpu_index.push_back(j);
break;
}
}
if (match) {
match_count += 1;
}
}
EXPECT_NEAR(match_count, num_box, 0);
}
TEST(MLUBridges, multiclass_nms) {
int background_label = -1;
int keep_top_k = 100;
int nms_top_k = 1000;
float score_threshold = 0.01;
float nms_threshold = 0.45;
int nms_eta = 1;
bool normalized = 0;
int batch_size = 1;
int num_boxes = 22743;
int class_num = 80;
int core_num = 4;
int box_size = 4;
test_multiclass_nms(score_threshold,
nms_top_k,
keep_top_k,
nms_threshold,
normalized,
nms_eta,
background_label,
batch_size,
class_num,
num_boxes,
box_size,
core_num);
}
} // namespace mlu
} // namespace subgraph
} // namespace lite
} // namespace paddle
USE_SUBGRAPH_BRIDGE(multiclass_nms, kMLU)
...@@ -43,6 +43,9 @@ USE_SUBGRAPH_BRIDGE(flatten, kMLU); ...@@ -43,6 +43,9 @@ USE_SUBGRAPH_BRIDGE(flatten, kMLU);
USE_SUBGRAPH_BRIDGE(flatten2, kMLU); USE_SUBGRAPH_BRIDGE(flatten2, kMLU);
USE_SUBGRAPH_BRIDGE(reshape, kMLU); USE_SUBGRAPH_BRIDGE(reshape, kMLU);
USE_SUBGRAPH_BRIDGE(reshape2, kMLU); USE_SUBGRAPH_BRIDGE(reshape2, kMLU);
USE_SUBGRAPH_BRIDGE(multiclass_nms, kMLU);
USE_SUBGRAPH_BRIDGE(density_prior_box, kMLU);
USE_SUBGRAPH_BRIDGE(box_coder, kMLU);
#ifdef LITE_BUILD_EXTRA #ifdef LITE_BUILD_EXTRA
USE_SUBGRAPH_BRIDGE(gather, kMLU); USE_SUBGRAPH_BRIDGE(gather, kMLU);
USE_SUBGRAPH_BRIDGE(lrn, kMLU) USE_SUBGRAPH_BRIDGE(lrn, kMLU)
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#pragma once #pragma once
#include <cnml.h> #include <cnml.h>
#include <cnplugin.h>
#include <cnrt.h> #include <cnrt.h>
#include <memory> #include <memory>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册