/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include #include #include #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/operators/detection/bbox_util.cu.h" #include "paddle/phi/core/mixed_vector.h" #include "paddle/phi/kernels/funcs/gather.cu.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace paddle { namespace operators { namespace { template static std::pair ProposalForOneImage( const phi::GPUContext &ctx, const phi::DenseTensor &im_info, const phi::DenseTensor &anchors, const phi::DenseTensor &variances, const phi::DenseTensor &bbox_deltas, // [M, 4] const phi::DenseTensor &scores, // [N, 1] int pre_nms_top_n, int post_nms_top_n, float nms_thresh, float min_size, float eta) { // 1. pre nms phi::DenseTensor scores_sort, index_sort; SortDescending(ctx, scores, &scores_sort, &index_sort); int num = scores.numel(); int pre_nms_num = (pre_nms_top_n <= 0 || pre_nms_top_n > num) ? scores.numel() : pre_nms_top_n; scores_sort.Resize({pre_nms_num, 1}); index_sort.Resize({pre_nms_num, 1}); // 2. box decode and clipping phi::DenseTensor proposals; proposals.mutable_data({pre_nms_num, 4}, ctx.GetPlace()); { platform::ForRange for_range(ctx, pre_nms_num); for_range(BoxDecodeAndClipFunctor{anchors.data(), bbox_deltas.data(), variances.data(), index_sort.data(), im_info.data(), proposals.data()}); } // 3. filter phi::DenseTensor keep_index, keep_num_t; keep_index.mutable_data({pre_nms_num}, ctx.GetPlace()); keep_num_t.mutable_data({1}, ctx.GetPlace()); min_size = std::max(min_size, 1.0f); auto stream = ctx.stream(); FilterBBoxes<<<1, 512, 0, stream>>>(proposals.data(), im_info.data(), min_size, pre_nms_num, keep_num_t.data(), keep_index.data()); int keep_num; const auto gpu_place = ctx.GetPlace(); memory::Copy(platform::CPUPlace(), &keep_num, gpu_place, keep_num_t.data(), sizeof(int), ctx.stream()); ctx.Wait(); keep_index.Resize({keep_num}); phi::DenseTensor scores_filter, proposals_filter; // Handle the case when there is no keep index left if (keep_num == 0) { phi::funcs::SetConstant set_zero; proposals_filter.mutable_data({1, 4}, ctx.GetPlace()); scores_filter.mutable_data({1, 1}, ctx.GetPlace()); set_zero(ctx, &proposals_filter, static_cast(0)); set_zero(ctx, &scores_filter, static_cast(0)); return std::make_pair(proposals_filter, scores_filter); } proposals_filter.mutable_data({keep_num, 4}, ctx.GetPlace()); scores_filter.mutable_data({keep_num, 1}, ctx.GetPlace()); phi::funcs::GPUGather(ctx, proposals, keep_index, &proposals_filter); phi::funcs::GPUGather(ctx, scores_sort, keep_index, &scores_filter); if (nms_thresh <= 0) { return std::make_pair(proposals_filter, scores_filter); } // 4. nms phi::DenseTensor keep_nms; NMS(ctx, proposals_filter, keep_index, nms_thresh, &keep_nms); if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) { keep_nms.Resize({post_nms_top_n}); } phi::DenseTensor scores_nms, proposals_nms; proposals_nms.mutable_data({keep_nms.numel(), 4}, ctx.GetPlace()); scores_nms.mutable_data({keep_nms.numel(), 1}, ctx.GetPlace()); phi::funcs::GPUGather(ctx, proposals_filter, keep_nms, &proposals_nms); phi::funcs::GPUGather(ctx, scores_filter, keep_nms, &scores_nms); return std::make_pair(proposals_nms, scores_nms); } } // namespace template class CUDAGenerateProposalsKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { auto *scores = context.Input("Scores"); auto *bbox_deltas = context.Input("BboxDeltas"); auto *im_info = context.Input("ImInfo"); auto anchors = GET_DATA_SAFELY(context.Input("Anchors"), "Input", "Anchors", "GenerateProposals"); auto variances = GET_DATA_SAFELY(context.Input("Variances"), "Input", "Variances", "GenerateProposals"); auto *rpn_rois = context.Output("RpnRois"); auto *rpn_roi_probs = context.Output("RpnRoiProbs"); int pre_nms_top_n = context.Attr("pre_nms_topN"); int post_nms_top_n = context.Attr("post_nms_topN"); float nms_thresh = context.Attr("nms_thresh"); float min_size = context.Attr("min_size"); float eta = context.Attr("eta"); PADDLE_ENFORCE_GE(eta, 1., platform::errors::InvalidArgument( "Not support adaptive NMS. The attribute 'eta' " "should not less than 1. But received eta=[%d]", eta)); auto &dev_ctx = context.template device_context(); auto scores_dim = scores->dims(); int64_t num = scores_dim[0]; int64_t c_score = scores_dim[1]; int64_t h_score = scores_dim[2]; int64_t w_score = scores_dim[3]; auto bbox_dim = bbox_deltas->dims(); int64_t c_bbox = bbox_dim[1]; int64_t h_bbox = bbox_dim[2]; int64_t w_bbox = bbox_dim[3]; phi::DenseTensor bbox_deltas_swap, scores_swap; bbox_deltas_swap.mutable_data({num, h_bbox, w_bbox, c_bbox}, dev_ctx.GetPlace()); scores_swap.mutable_data({num, h_score, w_score, c_score}, dev_ctx.GetPlace()); phi::funcs::Transpose trans; std::vector axis = {0, 2, 3, 1}; trans(dev_ctx, *bbox_deltas, &bbox_deltas_swap, axis); trans(dev_ctx, *scores, &scores_swap, axis); anchors.Resize({anchors.numel() / 4, 4}); variances.Resize({variances.numel() / 4, 4}); rpn_rois->mutable_data({bbox_deltas->numel() / 4, 4}, context.GetPlace()); rpn_roi_probs->mutable_data({scores->numel(), 1}, context.GetPlace()); T *rpn_rois_data = rpn_rois->data(); T *rpn_roi_probs_data = rpn_roi_probs->data(); auto place = dev_ctx.GetPlace(); auto cpu_place = platform::CPUPlace(); int64_t num_proposals = 0; std::vector offset(1, 0); std::vector tmp_num; for (int64_t i = 0; i < num; ++i) { phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1); phi::DenseTensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1); phi::DenseTensor scores_slice = scores_swap.Slice(i, i + 1); bbox_deltas_slice.Resize({h_bbox * w_bbox * c_bbox / 4, 4}); scores_slice.Resize({h_score * w_score * c_score, 1}); std::pair box_score_pair = ProposalForOneImage(dev_ctx, im_info_slice, anchors, variances, bbox_deltas_slice, scores_slice, pre_nms_top_n, post_nms_top_n, nms_thresh, min_size, eta); phi::DenseTensor &proposals = box_score_pair.first; phi::DenseTensor &scores = box_score_pair.second; memory::Copy(place, rpn_rois_data + num_proposals * 4, place, proposals.data(), sizeof(T) * proposals.numel(), dev_ctx.stream()); memory::Copy(place, rpn_roi_probs_data + num_proposals, place, scores.data(), sizeof(T) * scores.numel(), dev_ctx.stream()); num_proposals += proposals.dims()[0]; offset.emplace_back(num_proposals); tmp_num.push_back(proposals.dims()[0]); } if (context.HasOutput("RpnRoisNum")) { auto *rpn_rois_num = context.Output("RpnRoisNum"); rpn_rois_num->mutable_data({num}, context.GetPlace()); int *num_data = rpn_rois_num->data(); memory::Copy(place, num_data, cpu_place, &tmp_num[0], sizeof(int) * num, dev_ctx.stream()); rpn_rois_num->Resize({num}); } framework::LoD lod; lod.emplace_back(offset); rpn_rois->set_lod(lod); rpn_roi_probs->set_lod(lod); rpn_rois->Resize({num_proposals, 4}); rpn_roi_probs->Resize({num_proposals, 1}); } }; } // namespace operators } // namespace paddle namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL( generate_proposals, ops::CUDAGenerateProposalsKernel);