未验证 提交 292f52cc 编写于 作者: myq406450149's avatar myq406450149 提交者: GitHub

change box_coder. test=develop (#4107)

上级 17f00635
......@@ -20,69 +20,214 @@ namespace lite {
namespace arm {
namespace math {
void box_coder(lite::Tensor* proposals,
const lite::Tensor* anchors,
const lite::Tensor* variances,
const lite::Tensor* bbox_deltas,
const std::string code_type,
bool box_normalized,
int axis) {
if (code_type == "decode_center_size") {
float normalized = !box_normalized ? 1.f : 0;
const float* anchor_data = anchors->data<float>();
const float* bbox_deltas_data = bbox_deltas->data<float>();
float* proposals_data = proposals->mutable_data<float>();
const float* variances_data = variances->data<float>();
int N = bbox_deltas->dims()[0];
int M = bbox_deltas->dims()[1];
int len = bbox_deltas->dims()[2];
for (int64_t row_id = 0; row_id < N; ++row_id) {
for (int64_t col_id = 0; col_id < M; ++col_id) {
size_t offset = row_id * M * len + col_id * len;
int prior_box_offset = axis == 0 ? col_id * len : row_id * len;
int var_offset = axis == 0 ? col_id * len : row_id * len;
auto anchor_data_tmp = anchor_data + prior_box_offset;
auto bbox_deltas_data_tmp = bbox_deltas_data + offset;
auto proposals_data_tmp = proposals_data + offset;
auto anchor_width =
anchor_data_tmp[2] - anchor_data_tmp[0] + normalized;
auto anchor_height =
anchor_data_tmp[3] - anchor_data_tmp[1] + normalized;
auto anchor_center_x = anchor_data_tmp[0] + 0.5 * anchor_width;
auto anchor_center_y = anchor_data_tmp[1] + 0.5 * anchor_height;
float bbox_center_x = 0, bbox_center_y = 0;
float bbox_width = 0, bbox_height = 0;
auto variances_data_tmp = variances_data + var_offset;
bbox_center_x =
variances_data_tmp[0] * bbox_deltas_data_tmp[0] * anchor_width +
anchor_center_x;
bbox_center_y =
variances_data_tmp[1] * bbox_deltas_data_tmp[1] * anchor_height +
anchor_center_y;
bbox_width = std::exp(variances_data_tmp[2] * bbox_deltas_data_tmp[2]) *
anchor_width;
bbox_height =
std::exp(variances_data_tmp[3] * bbox_deltas_data_tmp[3]) *
anchor_height;
proposals_data_tmp[0] = bbox_center_x - bbox_width / 2;
proposals_data_tmp[1] = bbox_center_y - bbox_height / 2;
proposals_data_tmp[2] = bbox_center_x + bbox_width / 2 - normalized;
proposals_data_tmp[3] = bbox_center_y + bbox_height / 2 - normalized;
void decode_bbox_center_variance_kernel(const int batch_num,
const float* loc_data,
const float* prior_data,
const float* variance,
const int num_priors,
float* bbox_data) {
int cnt = num_priors / 4;
//! vprior 0: xmin, 1: ymin, 2: xmax, 3: ymax
//! vloc 0: xmin, 1: ymin, 2: xmax, 3: ymax
//! vvar
float32x4_t vhalf = vdupq_n_f32(0.5f);
int len_batch = num_priors * 4;
for (int n = 0; n < batch_num; ++n) {
const float* ptr_loc_batch = loc_data + n * len_batch;
float* ptr_bbox_batch = bbox_data + n * len_batch;
#pragma omp parallel for
for (int i = 0; i < cnt; ++i) {
int idx = i * 16;
const float* ptr_loc = ptr_loc_batch + idx;
const float* ptr_prior = prior_data + idx;
float* ptr_bbox = ptr_bbox_batch + idx;
float32x4x4_t vprior = vld4q_f32(ptr_prior);
float32x4x4_t vloc = vld4q_f32(ptr_loc);
float32x4_t vprior_width = vsubq_f32(vprior.val[2], vprior.val[0]);
float32x4_t vprior_height = vsubq_f32(vprior.val[3], vprior.val[1]);
float32x4_t vprior_cx =
vmulq_f32(vaddq_f32(vprior.val[0], vprior.val[2]), vhalf);
float32x4_t vprior_cy =
vmulq_f32(vaddq_f32(vprior.val[1], vprior.val[3]), vhalf);
float32x4_t vdec_bbx_cx =
vaddq_f32(vmulq_f32(vloc.val[0], vprior_width), vprior_cx);
float32x4_t vdec_bbx_cy =
vaddq_f32(vmulq_f32(vloc.val[1], vprior_height), vprior_cy);
float32x4_t vdec_bbx_w = exp_ps(vloc.val[2]);
float32x4_t vdec_bbx_h = exp_ps(vloc.val[3]);
vprior_width = vmulq_f32(vprior_width, vhalf);
vprior_height = vmulq_f32(vprior_height, vhalf);
vdec_bbx_w = vmulq_f32(vdec_bbx_w, vprior_width);
vdec_bbx_h = vmulq_f32(vdec_bbx_h, vprior_height);
vloc.val[0] = vsubq_f32(vdec_bbx_cx, vdec_bbx_w);
vloc.val[1] = vsubq_f32(vdec_bbx_cy, vdec_bbx_h);
vloc.val[2] = vaddq_f32(vdec_bbx_cx, vdec_bbx_w);
vloc.val[3] = vaddq_f32(vdec_bbx_cy, vdec_bbx_h);
vst4q_f32(ptr_bbox, vloc);
}
#pragma omp parallel for
for (int i = cnt * 4; i < num_priors; i++) {
int idx = i * 4;
float p_xmin = prior_data[idx];
float p_ymin = prior_data[idx + 1];
float p_xmax = prior_data[idx + 2];
float p_ymax = prior_data[idx + 3];
float prior_width = p_xmax - p_xmin;
float prior_height = p_ymax - p_ymin;
float prior_center_x = (p_xmin + p_xmax) / 2.f;
float prior_center_y = (p_ymin + p_ymax) / 2.f;
float xmin = ptr_loc_batch[idx];
float ymin = ptr_loc_batch[idx + 1];
float xmax = ptr_loc_batch[idx + 2];
float ymax = ptr_loc_batch[idx + 3];
//! variance is encoded in target, we simply need to retore the offset
//! predictions.
float decode_bbox_center_x = xmin * prior_width + prior_center_x;
float decode_bbox_center_y = ymin * prior_height + prior_center_y;
float decode_bbox_width = expf(xmax) * prior_width;
float decode_bbox_height = expf(ymax) * prior_height;
ptr_bbox_batch[idx] = decode_bbox_center_x - decode_bbox_width / 2.f;
ptr_bbox_batch[idx + 1] = decode_bbox_center_y - decode_bbox_height / 2.f;
ptr_bbox_batch[idx + 2] = decode_bbox_center_x + decode_bbox_width / 2.f;
ptr_bbox_batch[idx + 3] = decode_bbox_center_y + decode_bbox_height / 2.f;
}
}
}
void decode_bbox_center_no_variance_kernel(const int batch_num,
const float* loc_data,
const float* prior_data,
const float* variance,
const int num_priors,
const bool normalized,
float* bbox_data) {
int cnt = num_priors / 4;
//! vprior 0: xmin, 1: ymin, 2: xmax, 3: ymax
//! vloc 0: xmin, 1: ymin, 2: xmax, 3: ymax
//! vvar
float32x4_t vhalf = vdupq_n_f32(0.5f);
float norm_value = (normalized == false);
float32x4_t vnormalized = vdupq_n_f32(norm_value);
int len_batch = num_priors * 4;
for (int n = 0; n < batch_num; ++n) {
const float* ptr_loc_batch = loc_data + n * len_batch;
float* ptr_bbox_batch = bbox_data + n * len_batch;
#pragma omp parallel for
for (int i = 0; i < cnt; ++i) {
int idx = i * 16;
const float* ptr_loc = ptr_loc_batch + idx;
const float* ptr_prior = prior_data + idx;
const float* ptr_var = variance + idx;
float* ptr_bbox = ptr_bbox_batch + idx;
float32x4x4_t vprior = vld4q_f32(ptr_prior);
float32x4x4_t vloc = vld4q_f32(ptr_loc);
float32x4x4_t vvar = vld4q_f32(ptr_var);
float32x4_t vprior_width1 = vsubq_f32(vprior.val[2], vprior.val[0]);
float32x4_t vprior_height1 = vsubq_f32(vprior.val[3], vprior.val[1]);
float32x4_t vprior_width = vaddq_f32(vprior_width1, vnormalized);
float32x4_t vprior_height = vaddq_f32(vprior_height1, vnormalized);
float32x4_t vprior_cx =
vaddq_f32(vprior.val[0], vmulq_f32(vprior_width, vhalf));
float32x4_t vprior_cy =
vaddq_f32(vprior.val[1], vmulq_f32(vprior_height, vhalf));
vloc.val[0] = vmulq_f32(vloc.val[0], vvar.val[0]);
vloc.val[1] = vmulq_f32(vloc.val[1], vvar.val[1]);
vloc.val[2] = vmulq_f32(vloc.val[2], vvar.val[2]);
vloc.val[3] = vmulq_f32(vloc.val[3], vvar.val[3]);
float32x4_t vdec_bbx_cx =
vaddq_f32(vmulq_f32(vloc.val[0], vprior_width), vprior_cx);
float32x4_t vdec_bbx_cy =
vaddq_f32(vmulq_f32(vloc.val[1], vprior_height), vprior_cy);
float32x4_t vdec_bbx_w = exp_ps(vloc.val[2]);
float32x4_t vdec_bbx_h = exp_ps(vloc.val[3]);
vprior_width = vmulq_f32(vprior_width, vhalf);
vprior_height = vmulq_f32(vprior_height, vhalf);
vdec_bbx_w = vmulq_f32(vdec_bbx_w, vprior_width);
vdec_bbx_h = vmulq_f32(vdec_bbx_h, vprior_height);
vloc.val[0] = vsubq_f32(vdec_bbx_cx, vdec_bbx_w);
vloc.val[1] = vsubq_f32(vdec_bbx_cy, vdec_bbx_h);
vloc.val[2] = vaddq_f32(vdec_bbx_cx, vsubq_f32(vdec_bbx_w, vnormalized));
vloc.val[3] = vaddq_f32(vdec_bbx_cy, vsubq_f32(vdec_bbx_h, vnormalized));
vst4q_f32(ptr_bbox, vloc);
}
#pragma omp parallel for
for (int i = cnt * 4; i < num_priors; i++) {
int idx = i * 4;
float p_xmin = prior_data[idx];
float p_ymin = prior_data[idx + 1];
float p_xmax = prior_data[idx + 2];
float p_ymax = prior_data[idx + 3];
float prior_width = p_xmax - p_xmin + norm_value;
float prior_height = p_ymax - p_ymin + norm_value;
float prior_center_x = p_xmin + prior_width / 2.f;
float prior_center_y = p_ymin + prior_height / 2.f;
float xmin = ptr_loc_batch[idx];
float ymin = ptr_loc_batch[idx + 1];
float xmax = ptr_loc_batch[idx + 2];
float ymax = ptr_loc_batch[idx + 3];
//! variance is encoded in target, we simply need to retore the offset
//! predictions.
float decode_bbox_center_x =
variance[idx] * xmin * prior_width + prior_center_x;
float decode_bbox_center_y =
variance[idx + 1] * ymin * prior_height + prior_center_y;
float decode_bbox_width = expf(variance[idx + 2] * xmax) * prior_width;
float decode_bbox_height = expf(variance[idx + 3] * ymax) * prior_height;
ptr_bbox_batch[idx] = decode_bbox_center_x - decode_bbox_width / 2.f;
ptr_bbox_batch[idx + 1] = decode_bbox_center_y - decode_bbox_height / 2.f;
ptr_bbox_batch[idx + 2] =
decode_bbox_center_x + decode_bbox_width / 2.f - norm_value;
ptr_bbox_batch[idx + 3] =
decode_bbox_center_y + decode_bbox_height / 2.f - norm_value;
}
}
} else if (code_type == "encode_center_size") {
LOG(FATAL) << "not implemented type: " << code_type;
}
void decode_bboxes(const int batch_num,
const float* loc_data,
const float* prior_data,
const float* variance_data,
const std::string code_type,
const bool normalized,
const int num_priors,
float* bbox_data) {
if (code_type == "encode_center_size") {
decode_bbox_center_variance_kernel(
batch_num, loc_data, prior_data, variance_data, num_priors, bbox_data);
} else if (code_type == "decode_center_size") {
decode_bbox_center_no_variance_kernel(batch_num,
loc_data,
prior_data,
variance_data,
num_priors,
normalized,
bbox_data);
} else {
LOG(FATAL) << "not supported type: " << code_type;
LOG(FATAL) << "box_coder don't support this code_type: " << code_type;
}
}
......
......@@ -22,13 +22,14 @@ namespace lite {
namespace arm {
namespace math {
void box_coder(lite::Tensor* proposals,
const lite::Tensor* anchors,
const lite::Tensor* variances,
const lite::Tensor* bbox_deltas,
void decode_bboxes(const int batch_num,
const float* loc_data,
const float* prior_data,
const float* variance_data,
const std::string code_type,
bool box_normalized,
int axis);
const bool normalized,
const int num_priors,
float* bbox_data);
} // namespace math
} // namespace arm
......
......@@ -22,156 +22,7 @@ namespace lite {
namespace kernels {
namespace arm {
void EncodeCenterSize(const Tensor* target_box,
const Tensor* prior_box,
const Tensor* prior_box_var,
const bool normalized,
const std::vector<float> variance,
float* output) {
int64_t row = target_box->dims()[0];
int64_t col = prior_box->dims()[0];
int64_t len = prior_box->dims()[1];
for (int64_t i = 0; i < row; ++i) {
for (int64_t j = 0; j < col; ++j) {
auto* target_box_data = target_box->data<float>();
auto* prior_box_data = prior_box->data<float>();
int64_t offset = i * col * len + j * len;
float prior_box_width = prior_box_data[j * len + 2] -
prior_box_data[j * len] + (normalized == false);
float prior_box_height = prior_box_data[j * len + 3] -
prior_box_data[j * len + 1] +
(normalized == false);
float prior_box_center_x = prior_box_data[j * len] + prior_box_width / 2;
float prior_box_center_y =
prior_box_data[j * len + 1] + prior_box_height / 2;
float target_box_center_x =
(target_box_data[i * len + 2] + target_box_data[i * len]) / 2;
float target_box_center_y =
(target_box_data[i * len + 3] + target_box_data[i * len + 1]) / 2;
float target_box_width = target_box_data[i * len + 2] -
target_box_data[i * len] + (normalized == false);
float target_box_height = target_box_data[i * len + 3] -
target_box_data[i * len + 1] +
(normalized == false);
output[offset] =
(target_box_center_x - prior_box_center_x) / prior_box_width;
output[offset + 1] =
(target_box_center_y - prior_box_center_y) / prior_box_height;
output[offset + 2] =
std::log(std::fabs(target_box_width / prior_box_width));
output[offset + 3] =
std::log(std::fabs(target_box_height / prior_box_height));
}
}
if (prior_box_var) {
const float* prior_box_var_data = prior_box_var->data<float>();
for (int64_t i = 0; i < row; ++i) {
for (int64_t j = 0; j < col; ++j) {
for (int k = 0; k < 4; ++k) {
int64_t offset = i * col * len + j * len;
int64_t prior_var_offset = j * len;
output[offset + k] /= prior_box_var_data[prior_var_offset + k];
}
}
}
} else if (!(variance.empty())) {
for (int64_t i = 0; i < row; ++i) {
for (int64_t j = 0; j < col; ++j) {
for (int k = 0; k < 4; ++k) {
int64_t offset = i * col * len + j * len;
output[offset + k] /= static_cast<float>(variance[k]);
}
}
}
}
}
template <int axis, int var_size>
void DecodeCenterSize(const Tensor* target_box,
const Tensor* prior_box,
const Tensor* prior_box_var,
const bool normalized,
std::vector<float> variance,
float* output) {
int64_t row = target_box->dims()[0];
int64_t col = target_box->dims()[1];
int64_t len = target_box->dims()[2];
for (int64_t i = 0; i < row; ++i) {
for (int64_t j = 0; j < col; ++j) {
auto* target_box_data = target_box->data<float>();
auto* prior_box_data = prior_box->data<float>();
float var_data[4] = {1., 1., 1., 1.};
float* var_ptr = var_data;
int64_t offset = i * col * len + j * len;
int64_t prior_box_offset = axis == 0 ? j * len : i * len;
float prior_box_width = prior_box_data[prior_box_offset + 2] -
prior_box_data[prior_box_offset] +
(normalized == false);
float prior_box_height = prior_box_data[prior_box_offset + 3] -
prior_box_data[prior_box_offset + 1] +
(normalized == false);
float prior_box_center_x =
prior_box_data[prior_box_offset] + prior_box_width / 2;
float prior_box_center_y =
prior_box_data[prior_box_offset + 1] + prior_box_height / 2;
float target_box_center_x = 0, target_box_center_y = 0;
float target_box_width = 0, target_box_height = 0;
int64_t prior_var_offset = axis == 0 ? j * len : i * len;
if (var_size == 2) {
std::memcpy(var_ptr,
prior_box_var->data<float>() + prior_var_offset,
4 * sizeof(float));
} else if (var_size == 1) {
var_ptr = reinterpret_cast<float*>(variance.data());
}
float box_var_x = *var_ptr;
float box_var_y = *(var_ptr + 1);
float box_var_w = *(var_ptr + 2);
float box_var_h = *(var_ptr + 3);
target_box_center_x =
box_var_x * target_box_data[offset] * prior_box_width +
prior_box_center_x;
target_box_center_y =
box_var_y * target_box_data[offset + 1] * prior_box_height +
prior_box_center_y;
target_box_width =
std::exp(box_var_w * target_box_data[offset + 2]) * prior_box_width;
target_box_height =
std::exp(box_var_h * target_box_data[offset + 3]) * prior_box_height;
output[offset] = target_box_center_x - target_box_width / 2;
output[offset + 1] = target_box_center_y - target_box_height / 2;
output[offset + 2] =
target_box_center_x + target_box_width / 2 - (normalized == false);
output[offset + 3] =
target_box_center_y + target_box_height / 2 - (normalized == false);
}
}
}
void BoxCoderCompute::Run() {
/*
auto& param = Param<operators::BoxCoderParam>();
int axis = param.axis;
bool box_normalized = param.box_normalized;
std::string code_type = param.code_type;
lite::arm::math::box_coder(param.proposals,
param.prior_box,
param.prior_box_var,
param.target_box,
code_type,
box_normalized,
axis);
*/
auto& param = Param<operators::BoxCoderParam>();
auto* prior_box = param.prior_box;
auto* prior_box_var = param.prior_box_var;
......@@ -191,36 +42,20 @@ void BoxCoderCompute::Run() {
output_box->Resize({row, col, len});
auto* output = output_box->mutable_data<float>();
if (code_type == "encode_center_size") {
EncodeCenterSize(
target_box, prior_box, prior_box_var, normalized, variance, output);
} else if (code_type == "decode_center_size") {
if (prior_box_var) {
if (axis == 0) {
DecodeCenterSize<0, 2>(
target_box, prior_box, prior_box_var, normalized, variance, output);
} else {
DecodeCenterSize<1, 2>(
target_box, prior_box, prior_box_var, normalized, variance, output);
}
} else if (!(variance.empty())) {
if (axis == 0) {
DecodeCenterSize<0, 1>(
target_box, prior_box, prior_box_var, normalized, variance, output);
} else {
DecodeCenterSize<1, 1>(
target_box, prior_box, prior_box_var, normalized, variance, output);
}
} else {
if (axis == 0) {
DecodeCenterSize<0, 0>(
target_box, prior_box, prior_box_var, normalized, variance, output);
} else {
DecodeCenterSize<1, 0>(
target_box, prior_box, prior_box_var, normalized, variance, output);
}
}
}
int num = target_box->dims()[0];
const float* loc_data = target_box->data<float>();
const float* prior_data = prior_box->data<float>();
const float* variance_data = prior_box_var->data<float>();
int _num_priors = prior_box->numel() / 4;
bool _share_location = true;
lite::arm::math::decode_bboxes(num,
loc_data,
prior_data,
variance_data,
code_type,
normalized,
_num_priors,
output);
}
} // namespace arm
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册