提交 433d74a7 编写于 作者: L liuqi

Add PSROIAlign op.

上级 cd859dc9
......@@ -90,6 +90,7 @@ extern void Register_Eltwise(OperatorRegistry *op_registry);
extern void Register_FullyConnected(OperatorRegistry *op_registry);
extern void Register_Slice(OperatorRegistry *op_registry);
extern void Register_Proposal(OperatorRegistry *op_registry);
extern void Register_PSROIAlign(OperatorRegistry *op_registry);
} // namespace ops
......@@ -120,6 +121,7 @@ OperatorRegistry::OperatorRegistry() {
ops::Register_FullyConnected(this);
ops::Register_Slice(this);
ops::Register_Proposal(this);
ops::Register_PSROIAlign(this);
}
} // namespace mace
......@@ -42,33 +42,33 @@ std::vector<std::vector<float>> GenerateAnchors(
std::vector<std::vector<float>> anchors(scales_size * ratios_size,
std::vector<float>(4));
int idx = 0;
std::vector<float> tmp_anchor(4);
#pragma omp parallel for
for (size_t ratio_idx = 0; ratio_idx < ratios_size; ++ratio_idx) {
float ws = ::roundf(::sqrtf(size / ratios[ratio_idx]));
float hs = ::roundf(ws * ratios[ratio_idx]);
std::vector<float> tmp_anchor(4);
tmp_anchor[0] = base_window[2] - (ws - 1) / 2;
tmp_anchor[1] = base_window[3] - (hs - 1) / 2;
tmp_anchor[2] = base_window[2] + (ws - 1) / 2;
tmp_anchor[3] = base_window[3] + (hs - 1) / 2;
auto window = WHCenters(tmp_anchor);
for (size_t scale_idx = 0; scale_idx < scales_size; ++scale_idx) {
const size_t idx = ratio_idx * scales_size + scale_idx;
ws = window[0] * scales[scale_idx];
hs = window[1] * scales[scale_idx];
anchors[idx][0] = window[2] - (ws - 1) / 2;
anchors[idx][1] = window[3] - (hs - 1) / 2;
anchors[idx][2] = window[2] + (ws - 1) / 2;
anchors[idx][3] = window[3] + (hs - 1) / 2;
idx++;
}
}
return anchors;
}
std::vector<int> nms(const float *bboxes_ptr,
const float *scores_ptr,
const index_t num_bboxes,
const float thresh) {
const float thresh,
const int post_nms_top_n) {
std::vector<int> keep;
std::vector<int> suppressed(num_bboxes, 0);
......@@ -79,33 +79,29 @@ std::vector<int> nms(const float *bboxes_ptr,
(bboxes_ptr[idx + 3] - bboxes_ptr[idx + 1] + 1);
}
std::vector<int> order(num_bboxes, 0);
iota(order.begin(), order.end(), 0);
for (int i = 0; i < num_bboxes; ++i) {
if (suppressed[i] == 1) continue;
keep.push_back(i);
if (keep.size() >= post_nms_top_n) break;
int coord_idx = i << 2;
const float x1 = bboxes_ptr[coord_idx];
const float y1 = bboxes_ptr[coord_idx + 1];
const float x2 = bboxes_ptr[coord_idx + 2];
const float y2 = bboxes_ptr[coord_idx + 3];
const float area1 = areas[i];
for (int j = i + 1; j < num_bboxes; ++j) {
if (suppressed[j] == 1) continue;
float x1, y1, x2, y2, area1, iou;
for (index_t i = 0; i < num_bboxes; ++i) {
int idx = order[i];
if (suppressed[idx] == 1) continue;
keep.push_back(idx);
int coord_idx = idx << 2;
x1 = bboxes_ptr[coord_idx];
y1 = bboxes_ptr[coord_idx + 1];
x2 = bboxes_ptr[coord_idx + 2];
y2 = bboxes_ptr[coord_idx + 3];
area1 = areas[idx];
for (index_t j = i + 1; j < num_bboxes; ++j) {
const int other_idx = order[j];
if (suppressed[other_idx] == 1) continue;
coord_idx = other_idx << 2;
iou = std::max<float>(0.0,
std::min(x2, bboxes_ptr[coord_idx + 2]) -
std::max(x1, bboxes_ptr[coord_idx]) + 1)
coord_idx = j << 2;
const float iou =
std::max<float>(0.0,
std::min(x2, bboxes_ptr[coord_idx + 2]) -
std::max(x1, bboxes_ptr[coord_idx]) + 1)
* std::max<float>(0.0,
std::min(y2, bboxes_ptr[coord_idx + 3]) -
std::max(y1, bboxes_ptr[coord_idx + 1]) + 1);
if ((iou / (area1 + areas[other_idx] - iou)) >= thresh) {
suppressed[other_idx] = 1;
std::min(y2, bboxes_ptr[coord_idx + 3]) -
std::max(y1, bboxes_ptr[coord_idx + 1]) + 1);
if ((iou / (area1 + areas[j] - iou)) >= thresh) {
suppressed[j] = 1;
}
}
}
......@@ -140,8 +136,8 @@ struct ProposalFunctor {
MACE_CHECK((rpn_cls_prob->dim(3) / 2 == rpn_bbox_pred->dim(3) / 4) &&
(rpn_cls_prob->dim(3) / 2 == anchors_.size()));
const float *img_info = img_info_tensor->data<float>();
const index_t im_height = img_info[0] - 1;
const index_t im_width = img_info[1] - 1;
const int im_height = static_cast<int>(img_info[0] - 1);
const int im_width = static_cast<int>(img_info[1] - 1);
const index_t feat_height = rpn_cls_prob->dim(1);
const index_t feat_width = rpn_cls_prob->dim(2);
const int anchors_size = anchors_.size();
......@@ -150,35 +146,35 @@ struct ProposalFunctor {
std::vector<std::vector<float>> proposals(
anchors_size * feat_height * feat_width,
std::vector<float>(4));
int shift_w, shift_h;
int sanc_idx = 0;
#pragma omp parallel for collapse(3)
for (int h_idx = 0; h_idx < feat_height; ++h_idx) {
shift_h = h_idx * feat_stride_;
for (int w_idx = 0; w_idx < feat_width; ++w_idx) {
shift_w = w_idx * feat_stride_;
for (int a_idx = 0; a_idx < anchors_size; ++a_idx) {
const int shift_h = h_idx * feat_stride_;
const int shift_w = w_idx * feat_stride_;
const index_t sanc_idx = (h_idx * feat_width + w_idx) * anchors_size
+ a_idx;
proposals[sanc_idx][0] = anchors_[a_idx][0] + shift_w;
proposals[sanc_idx][1] = anchors_[a_idx][1] + shift_h;
proposals[sanc_idx][2] = anchors_[a_idx][2] + shift_w;
proposals[sanc_idx][3] = anchors_[a_idx][3] + shift_h;
sanc_idx++;
}
}
}
// Convert anchors into proposals via bbox transformations
// 2. clip predicted boxes to image
std::vector<int> keep;
const index_t min_size = min_size_ * img_info[2];
// 3. remove predicted boxes with either height or width < threshold
// (NOTE: convert min_size to input image scale stored in im_info[2])
const float *bbox_deltas = rpn_bbox_pred->data<float>();
sanc_idx = 0;
#pragma omp parallel for collapse(3)
for (int h_idx = 0; h_idx < feat_height; ++h_idx) {
for (int w_idx = 0; w_idx < feat_width; ++w_idx) {
for (int a_idx = 0; a_idx < anchors_size; ++a_idx) {
float width = proposals[sanc_idx][2] - proposals[sanc_idx][0] + 1;
float height = proposals[sanc_idx][3] - proposals[sanc_idx][1] + 1;
const int sanc_idx = (h_idx * feat_width + w_idx) * anchors_size
+ a_idx;
const float width = proposals[sanc_idx][2] -
proposals[sanc_idx][0] + 1;
const float height = proposals[sanc_idx][3] -
proposals[sanc_idx][1] + 1;
int delta_offset = sanc_idx * 4;
float pred_ctr_x = bbox_deltas[delta_offset + 0] * width +
(proposals[sanc_idx][0] + width / 2);
......@@ -200,12 +196,25 @@ struct ProposalFunctor {
std::min<float>(pred_ctr_y + pred_h / 2, im_height),
0);
width = proposals[sanc_idx][2] - proposals[sanc_idx][0] + 1;
height = proposals[sanc_idx][3] - proposals[sanc_idx][1] + 1;
}
}
}
// 3. remove predicted boxes with either height or width < threshold
// (NOTE: convert min_size to input image scale stored in im_info[2])
std::vector<int> keep;
const float min_size = min_size_ * img_info[2];
for (int h_idx = 0; h_idx < feat_height; ++h_idx) {
for (int w_idx = 0; w_idx < feat_width; ++w_idx) {
for (int a_idx = 0; a_idx < anchors_size; ++a_idx) {
const int sanc_idx = (h_idx * feat_width + w_idx) * anchors_size
+ a_idx;
const float width = proposals[sanc_idx][2]
- proposals[sanc_idx][0] + 1;
const float height = proposals[sanc_idx][3]
- proposals[sanc_idx][1] + 1;
if (width >= min_size && height >= min_size) {
keep.push_back(sanc_idx);
}
sanc_idx++;
}
}
}
......@@ -227,6 +236,7 @@ struct ProposalFunctor {
int size = std::min<int>(pre_nms_top_n_, keep.size());
std::vector<float> nms_scores(size, 0);
std::vector<float> nms_proposals((size << 2), 0);
#pragma omp parallel for
for (int i = 0; i < size; ++i) {
nms_scores[i] = scores[score_idx_func(keep[i])];
nms_proposals[i << 2] = proposals[keep[i]][0];
......@@ -238,15 +248,18 @@ struct ProposalFunctor {
/* 6. apply nms (e.g. threshold = 0.7)
7. take after_nms_topN (e.g. 300)
8. return the top proposals (-> RoIs top) */
auto nms_result = nms(nms_proposals.data(), nms_scores.data(),
nms_scores.size(), thresh_);
auto nms_result = nms(nms_proposals.data(),
nms_scores.size(),
thresh_,
post_nms_top_n_);
// Output rois blob
// Our RPN implementation only supports a single input image, so all
// batch inds are 0
size = std::min<int>(post_nms_top_n_, nms_result.size());
output->Resize({1, 1, size, 5});
size = static_cast<int>(nms_result.size());
output->Resize({size, 1, 1, 5});
auto output_ptr = output->mutable_data<float>();
#pragma omp parallel for
for (int i = 0; i < size; ++i) {
const int out_idx = i * 5;
const int nms_idx = nms_result[i] * 4;
......
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_KERNELS_PSROI_ALIGN_H_
#define MACE_KERNELS_PSROI_ALIGN_H_
#include "mace/core/future.h"
#include "mace/core/tensor.h"
#include "mace/public/mace.h"
namespace mace {
namespace kernels {
template<DeviceType D, typename T>
struct PSROIAlignFunctor {
PSROIAlignFunctor(const T spatial_scale,
const int output_dim,
const int group_size) :
spatial_scale_(spatial_scale),
output_dim_(output_dim),
group_size_(group_size) {}
void operator()(const Tensor *input,
const Tensor *rois,
Tensor *output,
StatsFuture *future) {
const int height = static_cast<int>(input->dim(1));
const int width = static_cast<int>(input->dim(2));
const int channels = static_cast<int>(input->dim(3));
const int pooled_height = group_size_;
const int pooled_width = group_size_;
const T *input_ptr = input->data<T>();
const T *rois_ptr = rois->data<T>();
// Number of ROIs
const int num_rois = rois->dim(0);
const int batch_size = input->dim(0);
output->Resize({num_rois, pooled_height, pooled_width, output_dim_});
T *output_ptr = output->mutable_data<T>();
for (int n = 0; n < num_rois; ++n) {
int roi_batch_ind = rois_ptr[0];
T roi_start_w =
static_cast<T>(rois_ptr[1]) * spatial_scale_;
T roi_start_h =
static_cast<T>(rois_ptr[2]) * spatial_scale_;
T roi_end_w =
static_cast<T>(rois_ptr[3] + 1.) * spatial_scale_;
T roi_end_h =
static_cast<T>(rois_ptr[4] + 1.) * spatial_scale_;
MACE_CHECK(roi_batch_ind >= 0);
MACE_CHECK(roi_batch_ind < batch_size);
// Force too small ROIs to be 1x1
T roi_width = std::max(roi_end_w - roi_start_w, static_cast<T>(0.1));
T roi_height = std::max(roi_end_h - roi_start_h, static_cast<T>(0.1));
// Compute w and h at bottom
T bin_size_h = roi_height / static_cast<T>(pooled_height);
T bin_size_w = roi_width / static_cast<T>(pooled_width);
const T *batch_data = input_ptr +
roi_batch_ind * height * width * channels;
std::vector<T> vhstart, vwstart, vhend, vwend;
for (int ph = 0; ph < pooled_height; ++ph) {
for (int pw = 0; pw < pooled_width; ++pw) {
T hstart = static_cast<T>(ph) * bin_size_h
+ roi_start_h;
T wstart = static_cast<T>(pw) * bin_size_w
+ roi_start_w;
T hend = static_cast<T>(ph + 1) * bin_size_h
+ roi_start_h;
T wend = static_cast<T>(pw + 1) * bin_size_w
+ roi_start_w;
// Add roi offsets and clip to input boundaries
hstart = std::min(std::max(hstart, static_cast<T>(0.)),
static_cast<T>(height));
hend = std::min(std::max(hend, static_cast<T>(0.)),
static_cast<T>(height));
wstart = std::min(std::max(wstart, static_cast<T>(0.)),
static_cast<T>(width));
wend = std::min(std::max(wend, static_cast<T>(0.)),
static_cast<T>(width));
vhstart.push_back(hstart);
vwstart.push_back(wstart);
vhend.push_back(hend);
vwend.push_back(wend);
}
}
#pragma omp parallel for collapse(3)
for (int ph = 0; ph < pooled_height; ++ph) {
for (int pw = 0; pw < pooled_width; ++pw) {
for (int c = 0; c < output_dim_; ++c) {
const int pool_index = ph * pooled_width + pw;
const int out_idx = pool_index * output_dim_ + c;
const int in_chan_idx = (c * pooled_height + ph)
* pooled_width + pw;
T hstart = vhstart[pool_index];
T hend = vhend[pool_index];
T wstart = vwstart[pool_index];
T wend = vwend[pool_index];
bool is_empty = (hend <= hstart) || (wend <= wstart);
T out_sum = 0;
for (T h = hstart; h < hend; h += 1.) {
for (T w = wstart; w < wend; w += 1.) {
// Selecting four regular locations for bilinear interpolation
int x_left = std::floor(w);
int x_right = std::ceil(w);
int y_bottom = std::floor(h);
int y_top = std::ceil(h);
int top_left_index = (y_top * width + x_left)
* channels + in_chan_idx;
int top_right_index = (y_top * width + x_right)
* channels + in_chan_idx;
int bottom_left_index = (y_bottom * width + x_left)
* channels + in_chan_idx;
int bottom_right_index = (y_bottom * width + x_right)
* channels + in_chan_idx;
bool is_top_left_in = x_left >= 0 && x_left <= width - 1
&& y_top >= 0 && y_top <= height - 1;
bool is_top_right_in = x_right >= 0 && x_right <= width - 1
&& y_top >= 0 && y_top <= height - 1;
bool is_bottom_left_in = x_left >= 0 && x_left <= width - 1
&& y_bottom >= 0 && y_bottom <= height - 1;
bool is_bottom_right_in = x_right >= 0 && x_right <= width - 1
&& y_bottom >= 0 && y_bottom <= height - 1;
if (is_top_left_in) {
out_sum += (1 - w + x_left) * (1 - y_top + h)
* batch_data[top_left_index];
}
if (is_top_right_in) {
out_sum += (1 - x_right + w) * (1 - y_top + h)
* batch_data[top_right_index];
}
if (is_bottom_left_in) {
out_sum += (1 - w + x_left) * (1 - h + y_bottom)
* batch_data[bottom_left_index];
}
if (is_bottom_right_in) {
out_sum += (1 - x_right + w) * (1 - h + y_bottom)
* batch_data[bottom_right_index];
}
}
}
T bin_area = (hend - hstart) * (wend - wstart);
output_ptr[out_idx] = is_empty ? 0. : out_sum / bin_area;
}
}
}
// Increment ROI data pointer
rois_ptr += 5;
output_ptr += pooled_height * pooled_width * output_dim_;
}
}
const T spatial_scale_;
const int output_dim_;
const int group_size_;
};
} // namepsace kernels
} // namespace mace
#endif // MACE_KERNELS_PSROI_ALIGN_H_
......@@ -4,6 +4,7 @@
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
#include <fstream>
namespace mace {
namespace ops {
......@@ -53,6 +54,7 @@ TEST_F(ProposalOpTest, CPUSimple) {
auto expected_tensor = CreateTensor<float>({1, 1, 1, 5}, {0, 0, 0, 255, 255});
ExpectTensorNear<float>(*expected_tensor, *net.GetTensor("Output"), 1e-5);
}
......
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/ops/psroi_align.h"
namespace mace {
namespace ops {
void Register_PSROIAlign(OperatorRegistry *op_registry) {
REGISTER_OPERATOR(op_registry, OpKeyBuilder("PSROIAlign")
.Device(DeviceType::CPU)
.TypeConstraint<float>("T")
.Build(),
PSROIAlignOp<DeviceType::CPU, float>);
}
} // namespace ops
} // namespace mace
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_OPS_PSROI_ALIGN_H_
#define MACE_OPS_PSROI_ALIGN_H_
#include "mace/core/operator.h"
#include "mace/kernels/psroi_align.h"
namespace mace {
namespace ops {
template <DeviceType D, class T>
class PSROIAlignOp : public Operator<D, T> {
public:
PSROIAlignOp(const OperatorDef &operator_def, Workspace *ws)
: Operator<D, T>(operator_def, ws),
functor_(OperatorBase::GetSingleArgument<T>("spatial_scale", 0),
OperatorBase::GetSingleArgument<int>("output_dim", 0),
OperatorBase::GetSingleArgument<int>("group_size", 0)) {}
bool Run(StatsFuture *future) override {
const Tensor *input = this->Input(INPUT);
const Tensor *rois = this->Input(ROIS);
Tensor *output = this->Output(OUTPUT);
functor_(input, rois, output, future);
return true;
}
private:
kernels::PSROIAlignFunctor<D, T> functor_;
protected:
OP_INPUT_TAGS(INPUT, ROIS);
OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace mace
#endif // MACE_OPS_PSROI_ALIGN_H_
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册