diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index df961be911537582fb60dd258fcdd4a5dd41a38e..8e571fce216c0e64fc1bfef5028e448a1d748df5 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -327,6 +327,7 @@ paddle.fluid.layers.polygon_box_transform ArgSpec(args=['input', 'name'], vararg paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.box_clip ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.multiclass_nms ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)) +paddle.fluid.layers.distribute_fpn_proposals ArgSpec(args=['fpn_rois', 'min_level', 'max_level', 'refer_level', 'refer_scale', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None)) paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1)) paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)) diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index f6fbe97565c43c306ea885c765c0a665492fa317..8088647856941a34e22fa89205fa3ca3420aceb0 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -33,6 +33,7 @@ detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc) detection_library(generate_proposal_labels_op SRCS generate_proposal_labels_op.cc) detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu) detection_library(yolov3_loss_op SRCS yolov3_loss_op.cc) +detection_library(distribute_fpn_proposals_op SRCS distribute_fpn_proposals_op.cc) if(WITH_GPU) detection_library(generate_proposals_op SRCS generate_proposals_op.cc generate_proposals_op.cu DEPS memory cub) diff --git a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..6d36876efd747d9e6f90c0d0200a9e9610a5318c --- /dev/null +++ b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc @@ -0,0 +1,93 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/detection/distribute_fpn_proposals_op.h" + +namespace paddle { +namespace operators { + +class DistributeFpnProposalsOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("FpnRois"), + "Input(FpnRois) shouldn't be null"); + PADDLE_ENFORCE_GE( + ctx->Outputs("MultiFpnRois").size(), 1UL, + "Outputs(MultiFpnRois) of DistributeOp should not be empty"); + size_t min_level = static_cast(ctx->Attrs().Get("min_level")); + size_t max_level = static_cast(ctx->Attrs().Get("max_level")); + PADDLE_ENFORCE_GE(max_level, min_level, + "max_level must not lower than min_level"); + // Set the output shape + size_t num_out_rois = max_level - min_level + 1; + std::vector outs_dims; + outs_dims.reserve(num_out_rois); + for (size_t i = 0; i < num_out_rois; ++i) { + framework::DDim out_dim = {-1, 4}; + outs_dims.push_back(out_dim); + } + ctx->SetOutputsDim("MultiFpnRois", outs_dims); + ctx->SetOutputDim("RestoreIndex", {1, -1}); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("FpnRois")); + return framework::OpKernelType(data_type, platform::CPUPlace()); + } +}; + +class DistributeFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("FpnRois", "(LoDTensor) The rois at all levels in shape (-1, 4)"); + AddOutput("MultiFpnRois", "(LoDTensor) Output with distribute operator") + .AsDuplicable(); + AddOutput("RestoreIndex", + "(Tensor) An array of positive number which is " + "used to restore the order of FpnRois"); + AddAttr("min_level", + "The lowest level of FPN layer where the" + " proposals come from"); + AddAttr("max_level", + "The highest level of FPN layer where the" + " proposals come from"); + AddAttr("refer_level", + "The referring level of FPN layer with" + " specified scale"); + AddAttr("refer_scale", + "The referring scale of FPN layer with" + " specified level"); + AddComment(R"DOC( +This operator distribute all proposals into different fpn level, + with respect to scale of the proposals, the referring scale and + the referring level. Besides, to restore the order of proposals, +we return an array which indicate the original index of rois in + current proposals. +)DOC"); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(distribute_fpn_proposals, ops::DistributeFpnProposalsOp, + ops::DistributeFpnProposalsOpMaker, + paddle::framework::EmptyGradOpMaker); +REGISTER_OP_CPU_KERNEL(distribute_fpn_proposals, + ops::DistributeFpnProposalsOpKernel, + ops::DistributeFpnProposalsOpKernel); diff --git a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.h b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.h new file mode 100644 index 0000000000000000000000000000000000000000..7c852934b5a5abe85f9717b77501ff8bc02c9135 --- /dev/null +++ b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.h @@ -0,0 +1,147 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/detail/safe_ref.h" +#include "paddle/fluid/operators/gather.h" +#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +const int kBoxDim = 4; + +template +static inline T BBoxArea(const T* box, bool normalized) { + if (box[2] < box[0] || box[3] < box[1]) { + // If coordinate values are is invalid + // (e.g. xmax < xmin or ymax < ymin), return 0. + return static_cast(0.); + } else { + const T w = box[2] - box[0]; + const T h = box[3] - box[1]; + if (normalized) { + return w * h; + } else { + // If coordinate values are not within range [0, 1]. + return (w + 1) * (h + 1); + } + } +} + +template +class DistributeFpnProposalsOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* fpn_rois = context.Input("FpnRois"); + + auto multi_fpn_rois = + context.MultiOutput("MultiFpnRois"); + + auto* restore_index = + context.Output("RestoreIndex"); + + const int min_level = context.Attr("min_level"); + const int max_level = context.Attr("max_level"); + const int refer_level = context.Attr("refer_level"); + const int refer_scale = context.Attr("refer_scale"); + const int num_level = max_level - min_level + 1; + + // check that the fpn_rois is not empty + PADDLE_ENFORCE_EQ(fpn_rois->lod().size(), 1UL, + "DistributeFpnProposalsOp need 1 level of LoD"); + + auto fpn_rois_lod = fpn_rois->lod().back(); + int fpn_rois_num = fpn_rois_lod[fpn_rois_lod.size() - 1]; + std::vector target_level; + // std::vector target_level(fpn_rois_num, -1); + // record the number of rois in each level + std::vector num_rois_level(num_level, 0); + std::vector num_rois_level_integral(num_level + 1, 0); + for (int i = 0; i < fpn_rois_lod.size() - 1; ++i) { + Tensor fpn_rois_slice = + fpn_rois->Slice(fpn_rois_lod[i], fpn_rois_lod[i + 1]); + const T* rois_data = fpn_rois_slice.data(); + for (int j = 0; j < fpn_rois_slice.dims()[0]; ++j) { + // get the target level of current rois + T roi_scale = std::sqrt(BBoxArea(rois_data, false)); + int tgt_lvl = + std::floor(std::log2(roi_scale / refer_scale) + refer_level); + tgt_lvl = std::min(max_level, std::max(tgt_lvl, min_level)); + target_level.push_back(tgt_lvl); + num_rois_level[tgt_lvl - min_level]++; + rois_data += kBoxDim; + } + } + // define the output rois + // pointer which point to each level fpn rois + T* multi_fpn_rois_data[num_level]; + // lod0 which will record the offset information of each level rois + std::vector> multi_fpn_rois_lod0; + for (int i = 0; i < num_level; ++i) { + // allocate memory for each level rois + multi_fpn_rois[i]->mutable_data({num_rois_level[i], kBoxDim}, + context.GetPlace()); + multi_fpn_rois_data[i] = multi_fpn_rois[i]->data(); + std::vector lod0(1, 0); + multi_fpn_rois_lod0.push_back(lod0); + // statistic start point for each level rois + num_rois_level_integral[i + 1] = + num_rois_level_integral[i] + num_rois_level[i]; + } + restore_index->mutable_data({1, fpn_rois_num}, context.GetPlace()); + int* restore_index_data = restore_index->data(); + std::vector restore_index_inter(fpn_rois_num, -1); + // distribute the rois into different fpn level by target level + for (int i = 0; i < fpn_rois_lod.size() - 1; ++i) { + Tensor fpn_rois_slice = + fpn_rois->Slice(fpn_rois_lod[i], fpn_rois_lod[i + 1]); + const T* rois_data = fpn_rois_slice.data(); + size_t cur_offset = fpn_rois_lod[i]; + // std::vector lod_offset[num_level]; + for (int j = 0; j < num_level; j++) { + multi_fpn_rois_lod0[j].push_back(multi_fpn_rois_lod0[j][i]); + } + for (int j = 0; j < fpn_rois_slice.dims()[0]; ++j) { + int lvl = target_level[cur_offset + j]; + memcpy(multi_fpn_rois_data[lvl - min_level], rois_data, + kBoxDim * sizeof(T)); + multi_fpn_rois_data[lvl - min_level] += kBoxDim; + int index_in_shuffle = num_rois_level_integral[lvl - min_level] + + multi_fpn_rois_lod0[lvl - min_level][i + 1]; + restore_index_inter[index_in_shuffle] = cur_offset + j; + multi_fpn_rois_lod0[lvl - min_level][i + 1]++; + rois_data += kBoxDim; + } + } + for (int i = 0; i < fpn_rois_num; ++i) { + restore_index_data[restore_index_inter[i]] = i; + } + // merge lod information into LoDTensor + for (int i = 0; i < num_level; ++i) { + framework::LoD lod; + lod.emplace_back(multi_fpn_rois_lod0[i]); + multi_fpn_rois[i]->set_lod(lod); + } + } +}; +} // namespace operators +} // namespace paddle diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 3b43ae0b9cb63a9f4708a680cb1021d74c197550..2151f32e7e6d0fa02fb012a206157dd98e7dca82 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -51,6 +51,7 @@ __all__ = [ 'yolov3_loss', 'box_clip', 'multiclass_nms', + 'distribute_fpn_proposals', ] @@ -2220,3 +2221,77 @@ def multiclass_nms(bboxes, output.stop_gradient = True return output + + +def distribute_fpn_proposals(fpn_rois, + min_level, + max_level, + refer_level, + refer_scale, + name=None): + """ + Distribute all proposals into different fpn level, with respect to scale + of the proposals, the referring scale and the referring level. Besides, to + restore the order of proposals, we return an array which indicate the + original index of rois in current proposals. To compute fpn level for each + roi, the formula is given as follows: + + .. code-block:: text + + roi_scale = sqrt(BBoxArea(fpn_roi)); + level = floor(log2(roi_scale / refer_scale) + refer_level) + + where BBoxArea is the function to compute the area of each roi: + + .. code-block:: text + + w = fpn_roi[2] - fpn_roi[0] + h = fpn_roi[3] - fpn_roi[1] + area = (w + 1) * (h + 1) + + Args: + fpn_rois(variable): The input fpn_rois, the last dimension is 4. + min_level(int): The lowest level of FPN layer where the proposals come + from. + max_level(int): The highest level of FPN layer where the proposals + come from. + refer_level(int): The referring level of FPN layer with specified scale. + refer_scale(int): The referring scale of FPN layer with specified level. + + Returns: + List(variable): The list of segmented tensor variables. + Variable: An array of positive number which is used to restore the + order of fpn_rois. + + Examples: + .. code-block:: python + + fpn_rois = fluid.layers.data( + name='data', shape=[4], dtype='float32', lod_level=1) + multi_rois, restore_ind = fluid.layers.distribute_fpn_proposals( + fpn_rois=fpn_rois, + min_level=2, + max_level=5, + refer_level=4, + refer_scale=224) + """ + + helper = LayerHelper('distribute_fpn_proposals', **locals()) + dtype = helper.input_dtype() + num_lvl = max_level - min_level + 1 + multi_rois = [ + helper.create_variable_for_type_inference(dtype) for i in range(num_lvl) + ] + restore_ind = helper.create_variable_for_type_inference(dtype='int32') + helper.append_op( + type='distribute_fpn_proposals', + inputs={'FpnRois': fpn_rois}, + outputs={'MultiFpnRois': multi_rois, + 'RestoreIndex': restore_ind}, + attrs={ + 'min_level': min_level, + 'max_level': max_level, + 'refer_level': refer_level, + 'refer_scale': refer_scale + }) + return multi_rois, restore_ind diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index 0d39a139eed87f900b1f59fd0569b6acaec0962b..6218db73459a2bb55d72545c738f88dbd8cce0f7 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -504,5 +504,21 @@ class TestMulticlassNMS(unittest.TestCase): self.assertIsNotNone(output) +class TestDistributeFpnProposals(unittest.TestCase): + def test_distribute_fpn_proposals(self): + program = Program() + with program_guard(program): + fpn_rois = fluid.layers.data( + name='data', shape=[4], dtype='float32', lod_level=1) + multi_rois, restore_ind = layers.distribute_fpn_proposals( + fpn_rois=fpn_rois, + min_level=2, + max_level=5, + refer_level=4, + refer_scale=224) + self.assertIsNotNone(multi_rois) + self.assertIsNotNone(restore_ind) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_distribute_fpn_proposals_op.py b/python/paddle/fluid/tests/unittests/test_distribute_fpn_proposals_op.py new file mode 100644 index 0000000000000000000000000000000000000000..1464060f5961aff7fe513ae9edb2cd974bffb316 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_distribute_fpn_proposals_op.py @@ -0,0 +1,117 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import math +import sys +from op_test import OpTest + + +class TestDistributeFPNProposalsOp(OpTest): + def set_data(self): + self.init_test_case() + self.make_rois() + self.rois_fpn, self.rois_idx_restore = self.calc_rois_distribute() + self.inputs = {'FpnRois': (self.rois[:, 1:5], self.rois_lod)} + self.attrs = { + 'max_level': self.roi_max_level, + 'min_level': self.roi_min_level, + 'refer_scale': self.canonical_scale, + 'refer_level': self.canonical_level + } + output = [('out%d' % i, self.rois_fpn[i]) + for i in range(len(self.rois_fpn))] + self.outputs = { + 'MultiFpnRois': output, + 'RestoreIndex': self.rois_idx_restore + } + + def init_test_case(self): + self.roi_max_level = 5 + self.roi_min_level = 2 + self.canonical_scale = 224 + self.canonical_level = 4 + self.images_shape = [512, 512] + + def boxes_area(self, boxes): + w = (boxes[:, 2] - boxes[:, 0] + 1) + h = (boxes[:, 3] - boxes[:, 1] + 1) + areas = w * h + assert np.all(areas >= 0), 'Negative areas founds' + return areas + + def map_rois_to_fpn_levels(self, rois, lvl_min, lvl_max): + s = np.sqrt(self.boxes_area(rois)) + s0 = self.canonical_scale + lvl0 = self.canonical_level + target_lvls = np.floor(lvl0 + np.log2(s / s0 + 1e-6)) + target_lvls = np.clip(target_lvls, lvl_min, lvl_max) + return target_lvls + + def get_sub_lod(self, sub_lvl): + sub_lod = [] + max_batch_id = sub_lvl[-1] + for i in range(max_batch_id.astype(np.int32) + 1): + sub_lod.append(np.where(sub_lvl == i)[0].size) + return sub_lod + + def add_multilevel_roi(self, rois, target_lvls, lvl_min, lvl_max): + rois_idx_order = np.empty((0, )) + rois_fpn = [] + for lvl in range(lvl_min, lvl_max + 1): + idx_lvl = np.where(target_lvls == lvl)[0] + if len(idx_lvl) == 0: + rois_fpn.append((np.empty(shape=(0, 4)), [[0, 0]])) + continue + sub_lod = self.get_sub_lod(rois[idx_lvl, 0]) + rois_fpn.append((rois[idx_lvl, 1:], [sub_lod])) + rois_idx_order = np.concatenate((rois_idx_order, idx_lvl)) + rois_idx_restore = np.argsort(rois_idx_order).astype( + np.int32, copy=False) + return rois_fpn, rois_idx_restore + + def calc_rois_distribute(self): + lvl_min = self.roi_min_level + lvl_max = self.roi_max_level + target_lvls = self.map_rois_to_fpn_levels(self.rois[:, 1:5], lvl_min, + lvl_max) + rois_fpn, rois_idx_restore = self.add_multilevel_roi( + self.rois, target_lvls, lvl_min, lvl_max) + return rois_fpn, rois_idx_restore + + def make_rois(self): + self.rois_lod = [[100, 200]] + rois = [] + lod = self.rois_lod[0] + bno = 0 + for roi_num in lod: + for i in range(roi_num): + xywh = np.random.rand(4) + xy1 = xywh[0:2] * 20 + wh = xywh[2:4] * (self.images_shape - xy1) + xy2 = xy1 + wh + roi = [bno, xy1[0], xy1[1], xy2[0], xy2[1]] + rois.append(roi) + bno += 1 + self.rois = np.array(rois).astype("float32") + + def setUp(self): + self.op_type = "distribute_fpn_proposals" + self.set_data() + + def test_check_output(self): + self.check_output()