// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "lite/kernels/arm/generate_proposals_compute.h" #include #include #include #include "lite/backends/arm/math/funcs.h" #include "lite/core/op_registry.h" #include "lite/core/tensor.h" #include "lite/core/type_system.h" namespace paddle { namespace lite { namespace kernels { namespace arm { static const double kBBoxClipDefault = std::log(1000.0 / 16.0); static void permute(const Tensor &input, Tensor *output, const std::vector &orders) { auto in_dims = input.dims(); auto out_dims = output->dims(); int num_axes = in_dims.size(); int count = in_dims.production(); const float *din = input.data(); float *dout = output->mutable_data(); std::vector old_steps( {static_cast(in_dims[1] * in_dims[2] * in_dims[3]), static_cast(in_dims[2] * in_dims[3]), static_cast(in_dims[3]), 1}); std::vector new_steps( {static_cast(out_dims[1] * out_dims[2] * out_dims[3]), static_cast(out_dims[2] * out_dims[3]), static_cast(out_dims[3]), 1}); for (int i = 0; i < count; ++i) { int old_idx = 0; int idx = i; for (int j = 0; j < num_axes; ++j) { int order = orders[j]; old_idx += (idx / new_steps[j]) * old_steps[order]; idx %= new_steps[j]; } dout[i] = din[old_idx]; } } template static void gather(const Tensor &src, const Tensor &index, Tensor *output) { auto *p_src = src.data(); auto *p_index = index.data(); auto *p_output = output->mutable_data(); auto src_dims = src.dims(); int slice_size = 1; for (int i = 1; i < src_dims.size(); i++) slice_size *= src_dims[i]; size_t slice_bytes = slice_size * sizeof(T); int64_t index_size = index.numel(); for (int64_t i = 0; i < index_size; i++) { IndexT index_ = p_index[i]; memcpy(p_output + i * slice_size, p_src + index_ * slice_size, slice_bytes); } } template static void BoxCoder(Tensor *all_anchors, Tensor *bbox_deltas, Tensor *variances, Tensor *proposals) { T *proposals_data = proposals->mutable_data(); int64_t row = all_anchors->dims()[0]; int64_t len = all_anchors->dims()[1]; auto *bbox_deltas_data = bbox_deltas->data(); auto *anchor_data = all_anchors->data(); const T *variances_data = nullptr; if (variances) { variances_data = variances->data(); } for (int64_t i = 0; i < row; ++i) { T anchor_width = anchor_data[i * len + 2] - anchor_data[i * len] + 1.0; T anchor_height = anchor_data[i * len + 3] - anchor_data[i * len + 1] + 1.0; T anchor_center_x = anchor_data[i * len] + 0.5 * anchor_width; T anchor_center_y = anchor_data[i * len + 1] + 0.5 * anchor_height; T bbox_center_x = 0, bbox_center_y = 0; T bbox_width = 0, bbox_height = 0; if (variances) { bbox_center_x = variances_data[i * len] * bbox_deltas_data[i * len] * anchor_width + anchor_center_x; bbox_center_y = variances_data[i * len + 1] * bbox_deltas_data[i * len + 1] * anchor_height + anchor_center_y; bbox_width = std::exp(std::min(variances_data[i * len + 2] * bbox_deltas_data[i * len + 2], kBBoxClipDefault)) * anchor_width; bbox_height = std::exp(std::min(variances_data[i * len + 3] * bbox_deltas_data[i * len + 3], kBBoxClipDefault)) * anchor_height; } else { bbox_center_x = bbox_deltas_data[i * len] * anchor_width + anchor_center_x; bbox_center_y = bbox_deltas_data[i * len + 1] * anchor_height + anchor_center_y; bbox_width = std::exp(std::min(bbox_deltas_data[i * len + 2], kBBoxClipDefault)) * anchor_width; bbox_height = std::exp(std::min(bbox_deltas_data[i * len + 3], kBBoxClipDefault)) * anchor_height; } proposals_data[i * len] = bbox_center_x - bbox_width / 2; proposals_data[i * len + 1] = bbox_center_y - bbox_height / 2; proposals_data[i * len + 2] = bbox_center_x + bbox_width / 2 - 1; proposals_data[i * len + 3] = bbox_center_y + bbox_height / 2 - 1; } // return proposals; } template static void ClipTiledBoxes(const Tensor &im_info, Tensor *boxes) { T *boxes_data = boxes->mutable_data(); const T *im_info_data = im_info.data(); T zero(0); for (int64_t i = 0; i < boxes->numel(); ++i) { if (i % 4 == 0) { boxes_data[i] = std::max(std::min(boxes_data[i], im_info_data[1] - 1), zero); } else if (i % 4 == 1) { boxes_data[i] = std::max(std::min(boxes_data[i], im_info_data[0] - 1), zero); } else if (i % 4 == 2) { boxes_data[i] = std::max(std::min(boxes_data[i], im_info_data[1] - 1), zero); } else { boxes_data[i] = std::max(std::min(boxes_data[i], im_info_data[0] - 1), zero); } } } template static void FilterBoxes(Tensor *boxes, float min_size, const Tensor &im_info, Tensor *keep) { T *boxes_data = boxes->mutable_data(); const T *im_info_data = im_info.data(); T im_scale = im_info_data[2]; min_size = std::max(min_size, 1.0f); keep->Resize(std::vector({boxes->dims()[0]})); int *keep_data = keep->mutable_data(); int keep_len = 0; for (int i = 0; i < boxes->dims()[0]; ++i) { T ws = boxes_data[4 * i + 2] - boxes_data[4 * i] + 1; T hs = boxes_data[4 * i + 3] - boxes_data[4 * i + 1] + 1; T ws_origin_scale = (boxes_data[4 * i + 2] - boxes_data[4 * i]) / im_scale + 1; T hs_origin_scale = (boxes_data[4 * i + 3] - boxes_data[4 * i + 1]) / im_scale + 1; T x_ctr = boxes_data[4 * i] + ws / 2; T y_ctr = boxes_data[4 * i + 1] + hs / 2; if (ws_origin_scale >= min_size && hs_origin_scale >= min_size && x_ctr <= im_info_data[1] && y_ctr <= im_info_data[0]) { keep_data[keep_len++] = i; } } keep->Resize(std::vector({keep_len})); } template static std::vector> GetSortedScoreIndex( const std::vector &scores) { std::vector> sorted_indices; sorted_indices.reserve(scores.size()); for (size_t i = 0; i < scores.size(); ++i) { sorted_indices.emplace_back(scores[i], i); } // Sort the score pair according to the scores in descending order std::stable_sort(sorted_indices.begin(), sorted_indices.end(), [](const std::pair &a, const std::pair &b) { return a.first < b.first; }); return sorted_indices; } template static T BBoxArea(const T *box, bool normalized) { if (box[2] < box[0] || box[3] < box[1]) { // If coordinate values are is invalid // (e.g. xmax < xmin or ymax < ymin), return 0. return static_cast(0.); } else { const T w = box[2] - box[0]; const T h = box[3] - box[1]; if (normalized) { return w * h; } else { // If coordinate values are not within range [0, 1]. return (w + 1) * (h + 1); } } } template static T JaccardOverlap(const T *box1, const T *box2, bool normalized) { if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] || box2[3] < box1[1]) { return static_cast(0.); } else { const T inter_xmin = std::max(box1[0], box2[0]); const T inter_ymin = std::max(box1[1], box2[1]); const T inter_xmax = std::min(box1[2], box2[2]); const T inter_ymax = std::min(box1[3], box2[3]); const T inter_w = std::max(T(0), inter_xmax - inter_xmin + 1); const T inter_h = std::max(T(0), inter_ymax - inter_ymin + 1); const T inter_area = inter_w * inter_h; const T bbox1_area = BBoxArea(box1, normalized); const T bbox2_area = BBoxArea(box2, normalized); return inter_area / (bbox1_area + bbox2_area - inter_area); } } template static Tensor VectorToTensor(const std::vector &selected_indices, int selected_num) { Tensor keep_nms; keep_nms.Resize(std::vector({selected_num})); auto *keep_data = keep_nms.mutable_data(); for (int i = 0; i < selected_num; ++i) { keep_data[i] = selected_indices[i]; } return keep_nms; } template static Tensor NMS(Tensor *bbox, Tensor *scores, T nms_threshold, float eta) { int64_t num_boxes = bbox->dims()[0]; int64_t box_size = bbox->dims()[1]; // 4: [xmin ymin xmax ymax] std::vector scores_data(num_boxes); std::copy_n(scores->data(), num_boxes, scores_data.begin()); std::vector> sorted_indices = GetSortedScoreIndex(scores_data); std::vector selected_indices; int selected_num = 0; T adaptive_threshold = nms_threshold; const T *bbox_data = bbox->data(); while (sorted_indices.size() != 0) { int idx = sorted_indices.back().second; bool flag = true; for (int kept_idx : selected_indices) { if (flag) { T overlap = JaccardOverlap( bbox_data + idx * box_size, bbox_data + kept_idx * box_size, false); flag = (overlap <= adaptive_threshold); } else { break; } } if (flag) { selected_indices.push_back(idx); ++selected_num; } sorted_indices.erase(sorted_indices.end() - 1); if (flag && eta < 1 && adaptive_threshold > 0.5) { adaptive_threshold *= eta; } } return VectorToTensor(selected_indices, selected_num); } static std::pair ProposalForOneImage( const Tensor &im_info_slice, const Tensor &anchors, const Tensor &variances, // H * W * A * 4 const Tensor &bbox_deltas_slice, // [A, 4] const Tensor &scores_slice, // [A, 1] int pre_nms_top_n, int post_nms_top_n, float nms_thresh, float min_size, float eta) { // sort scores_slice Tensor index_t; index_t.Resize(std::vector({scores_slice.numel()})); auto *index = index_t.mutable_data(); for (int i = 0; i < index_t.numel(); i++) { index[i] = i; } auto *scores_data = scores_slice.data(); auto compare_func = [scores_data](const int64_t &i, const int64_t &j) { return scores_data[i] > scores_data[j]; }; if (pre_nms_top_n <= 0 || pre_nms_top_n >= scores_slice.numel()) { std::stable_sort(index, index + scores_slice.numel(), compare_func); } else { std::nth_element(index, index + pre_nms_top_n, index + scores_slice.numel(), compare_func); index_t.Resize({pre_nms_top_n}); } Tensor scores_sel, bbox_sel, anchor_sel, var_sel; scores_sel.Resize(std::vector({index_t.numel(), 1})); bbox_sel.Resize(std::vector({index_t.numel(), 4})); anchor_sel.Resize(std::vector({index_t.numel(), 4})); var_sel.Resize(std::vector({index_t.numel(), 4})); gather(scores_slice, index_t, &scores_sel); gather(bbox_deltas_slice, index_t, &bbox_sel); gather(anchors, index_t, &anchor_sel); gather(variances, index_t, &var_sel); Tensor proposals; proposals.Resize(std::vector({index_t.numel(), 4})); BoxCoder(&anchor_sel, &bbox_sel, &var_sel, &proposals); ClipTiledBoxes(im_info_slice, &proposals); Tensor keep; FilterBoxes(&proposals, min_size, im_info_slice, &keep); Tensor scores_filter; scores_filter.Resize(std::vector({keep.numel(), 1})); bbox_sel.Resize(std::vector({keep.numel(), 4})); gather(scores_sel, keep, &scores_filter); gather(proposals, keep, &bbox_sel); if (nms_thresh <= 0) { return std::make_pair(bbox_sel, scores_filter); } Tensor keep_nms = NMS(&bbox_sel, &scores_filter, nms_thresh, eta); if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) { keep_nms.Resize(std::vector({post_nms_top_n})); } proposals.Resize(std::vector({keep_nms.numel(), 4})); scores_sel.Resize(std::vector({keep_nms.numel(), 1})); gather(bbox_sel, keep_nms, &proposals); gather(scores_filter, keep_nms, &scores_sel); return std::make_pair(proposals, scores_sel); } void AppendTensor(Tensor *dst, int64_t offset, const Tensor &src) { auto *out_data = static_cast(dst->mutable_data()); auto *to_add_data = static_cast(src.data()); size_t size_of_t = sizeof(float); offset *= size_of_t; std::memcpy( reinterpret_cast(reinterpret_cast(out_data) + offset), to_add_data, src.numel() * size_of_t); } void GenerateProposalsCompute::Run() { auto &ctx = this->ctx_->template As(); auto ¶m = Param(); auto *scores = param.Scores; // N * A * H * W auto *bbox_deltas = param.BboxDeltas; // N * 4A * H * W auto *im_info = param.ImInfo; // N * 3 auto *anchors = param.Anchors; // H * W * A * 4 auto *variances = param.Variances; // H * W * A * 4 auto *rpn_rois = param.RpnRois; // A * 4 auto *rpn_roi_probs = param.RpnRoiProbs; // A * 1 int pre_nms_top_n = param.pre_nms_topN; int post_nms_top_n = param.post_nms_topN; float nms_thresh = param.nms_thresh; float min_size = param.min_size; float eta = param.eta; auto &scores_dim = scores->dims(); int64_t num = scores_dim[0]; int64_t c_score = scores_dim[1]; int64_t h_score = scores_dim[2]; int64_t w_score = scores_dim[3]; auto &bbox_dim = bbox_deltas->dims(); int64_t c_bbox = bbox_dim[1]; int64_t h_bbox = bbox_dim[2]; int64_t w_bbox = bbox_dim[3]; rpn_rois->Resize({scores->numel(), 4}); rpn_roi_probs->Resize(std::vector({scores->numel(), 1})); Tensor bbox_deltas_swap, scores_swap; scores_swap.Resize(std::vector({num, h_score, w_score, c_score})); bbox_deltas_swap.Resize(std::vector({num, h_bbox, w_bbox, c_bbox})); std::vector orders({0, 2, 3, 1}); permute(*scores, &scores_swap, orders); permute(*bbox_deltas, &bbox_deltas_swap, orders); LoD lod; lod.resize(1); auto &lod0 = lod[0]; lod0.push_back(0); anchors->Resize(std::vector({anchors->numel() / 4, 4})); variances->Resize(std::vector({variances->numel() / 4, 4})); int64_t num_proposals = 0; for (int64_t i = 0; i < num; ++i) { Tensor im_info_slice = im_info->Slice(i, i + 1); Tensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1); Tensor scores_slice = scores_swap.Slice(i, i + 1); bbox_deltas_slice.Resize( std::vector({c_bbox * h_bbox * w_bbox / 4, 4})); scores_slice.Resize(std::vector({c_score * h_score * w_score, 1})); std::pair tensor_pair = ProposalForOneImage(im_info_slice, *anchors, *variances, bbox_deltas_slice, scores_slice, pre_nms_top_n, post_nms_top_n, nms_thresh, min_size, eta); Tensor &proposals = tensor_pair.first; Tensor &scores = tensor_pair.second; AppendTensor(rpn_rois, 4 * num_proposals, proposals); AppendTensor(rpn_roi_probs, num_proposals, scores); num_proposals += proposals.dims()[0]; lod0.push_back(num_proposals); } rpn_rois->set_lod(lod); rpn_roi_probs->set_lod(lod); rpn_rois->Resize({num_proposals, 4}); rpn_roi_probs->Resize({num_proposals, 1}); /* auto* rpn_roi_probs_data = rpn_roi_probs->data(); LOG(INFO) << "rpn_roi_probs:" << rpn_roi_probs->dims(); for (int i = 0; i < rpn_roi_probs->numel() - 4; i = i + 4) { LOG(INFO) << rpn_roi_probs_data[i] << " " << rpn_roi_probs_data[i+1] << " " << rpn_roi_probs_data[i+2] << " " << rpn_roi_probs_data[i+3]; } auto* rpn_roi_data = rpn_rois->data(); LOG(INFO) << "rpn_roi:" << rpn_rois->dims(); for (int i = 0; i < rpn_rois->numel() - 4; i = i + 4) { LOG(INFO) << rpn_roi_data[i] << " " << rpn_roi_data[i+1] << " " << rpn_roi_data[i+2] << " " << rpn_roi_data[i+3]; } */ } } // namespace arm } // namespace kernels } // namespace lite } // namespace paddle REGISTER_LITE_KERNEL(generate_proposals, kARM, kFloat, kNCHW, paddle::lite::kernels::arm::GenerateProposalsCompute, def) .BindInput("Scores", {LiteType::GetTensorTy(TARGET(kARM))}) .BindInput("BboxDeltas", {LiteType::GetTensorTy(TARGET(kARM))}) .BindInput("ImInfo", {LiteType::GetTensorTy(TARGET(kARM))}) .BindInput("Anchors", {LiteType::GetTensorTy(TARGET(kARM))}) .BindInput("Variances", {LiteType::GetTensorTy(TARGET(kARM))}) .BindOutput("RpnRois", {LiteType::GetTensorTy(TARGET(kARM))}) .BindOutput("RpnRoiProbs", {LiteType::GetTensorTy(TARGET(kARM))}) .Finalize();