diff --git a/ppdet/modeling/heads/roi_extractor.py b/ppdet/modeling/heads/roi_extractor.py index cab81d5ea74dc2fced1eadaaad2f6da3190dba0c..c7eb4638219f9ba30b61aee59953ee44ba483868 100644 --- a/ppdet/modeling/heads/roi_extractor.py +++ b/ppdet/modeling/heads/roi_extractor.py @@ -87,13 +87,23 @@ class RoIAlign(object): offset = 2 k_min = self.start_level + offset k_max = self.end_level + offset - rois_dist, restore_index, rois_num_dist = paddle.vision.ops.distribute_fpn_proposals( - roi, - k_min, - k_max, - self.canconical_level, - self.canonical_size, - rois_num=rois_num) + if hasattr(paddle.vision.ops, "distribute_fpn_proposals"): + rois_dist, restore_index, rois_num_dist = paddle.vision.ops.distribute_fpn_proposals( + roi, + k_min, + k_max, + self.canconical_level, + self.canonical_size, + rois_num=rois_num) + else: + ops.distribute_fpn_proposals( + roi, + k_min, + k_max, + self.canconical_level, + self.canonical_size, + rois_num=rois_num) + rois_feat_list = [] for lvl in range(self.start_level, self.end_level + 1): roi_feat = paddle.vision.ops.roi_align( diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py index 564261c89d31e9c3c785debba4ac03571757a15e..025b60050237824673b6070a3d910a1895904270 100644 --- a/ppdet/modeling/ops.py +++ b/ppdet/modeling/ops.py @@ -17,15 +17,20 @@ import paddle.nn.functional as F import paddle.nn as nn from paddle import ParamAttr from paddle.regularizer import L2Decay -from paddle import _C_ops, _legacy_C_ops +try: + import paddle._legacy_C_ops as C_ops +except: + import paddle._C_ops as C_ops from paddle import in_dynamic_mode from paddle.common_ops_import import Variable, LayerHelper, check_variable_and_dtype, check_type, check_dtype __all__ = [ 'prior_box', + 'generate_proposals', 'box_coder', 'multiclass_nms', + 'distribute_fpn_proposals', 'matrix_nms', 'batch_norm', 'mish', @@ -113,6 +118,136 @@ def batch_norm(ch, return norm_layer +@paddle.jit.not_to_static +def distribute_fpn_proposals(fpn_rois, + min_level, + max_level, + refer_level, + refer_scale, + pixel_offset=False, + rois_num=None, + name=None): + r""" + + **This op only takes LoDTensor as input.** In Feature Pyramid Networks + (FPN) models, it is needed to distribute all proposals into different FPN + level, with respect to scale of the proposals, the referring scale and the + referring level. Besides, to restore the order of proposals, we return an + array which indicates the original index of rois in current proposals. + To compute FPN level for each roi, the formula is given as follows: + + .. math:: + + roi\_scale &= \sqrt{BBoxArea(fpn\_roi)} + + level = floor(&\log(\\frac{roi\_scale}{refer\_scale}) + refer\_level) + + where BBoxArea is a function to compute the area of each roi. + + Args: + + fpn_rois(Variable): 2-D Tensor with shape [N, 4] and data type is + float32 or float64. The input fpn_rois. + min_level(int32): The lowest level of FPN layer where the proposals come + from. + max_level(int32): The highest level of FPN layer where the proposals + come from. + refer_level(int32): The referring level of FPN layer with specified scale. + refer_scale(int32): The referring scale of FPN layer with specified level. + rois_num(Tensor): 1-D Tensor contains the number of RoIs in each image. + The shape is [B] and data type is int32. B is the number of images. + If it is not None then return a list of 1-D Tensor. Each element + is the output RoIs' number of each image on the corresponding level + and the shape is [B]. None by default. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + Tuple: + + multi_rois(List) : A list of 2-D LoDTensor with shape [M, 4] + and data type of float32 and float64. The length is + max_level-min_level+1. The proposals in each FPN level. + + restore_ind(Variable): A 2-D Tensor with shape [N, 1], N is + the number of total rois. The data type is int32. It is + used to restore the order of fpn_rois. + + rois_num_per_level(List): A list of 1-D Tensor and each Tensor is + the RoIs' number in each image on the corresponding level. The shape + is [B] and data type of int32. B is the number of images + + + Examples: + .. code-block:: python + + import paddle + from ppdet.modeling import ops + paddle.enable_static() + fpn_rois = paddle.static.data( + name='data', shape=[None, 4], dtype='float32', lod_level=1) + multi_rois, restore_ind = ops.distribute_fpn_proposals( + fpn_rois=fpn_rois, + min_level=2, + max_level=5, + refer_level=4, + refer_scale=224) + """ + num_lvl = max_level - min_level + 1 + + if in_dynamic_mode(): + assert rois_num is not None, "rois_num should not be None in dygraph mode." + attrs = ('min_level', min_level, 'max_level', max_level, 'refer_level', + refer_level, 'refer_scale', refer_scale, 'pixel_offset', + pixel_offset) + multi_rois, restore_ind, rois_num_per_level = C_ops.distribute_fpn_proposals( + fpn_rois, rois_num, num_lvl, num_lvl, *attrs) + + return multi_rois, restore_ind, rois_num_per_level + + else: + check_variable_and_dtype(fpn_rois, 'fpn_rois', ['float32', 'float64'], + 'distribute_fpn_proposals') + helper = LayerHelper('distribute_fpn_proposals', **locals()) + dtype = helper.input_dtype('fpn_rois') + multi_rois = [ + helper.create_variable_for_type_inference(dtype) + for i in range(num_lvl) + ] + + restore_ind = helper.create_variable_for_type_inference(dtype='int32') + + inputs = {'FpnRois': fpn_rois} + outputs = { + 'MultiFpnRois': multi_rois, + 'RestoreIndex': restore_ind, + } + + if rois_num is not None: + inputs['RoisNum'] = rois_num + rois_num_per_level = [ + helper.create_variable_for_type_inference(dtype='int32') + for i in range(num_lvl) + ] + outputs['MultiLevelRoIsNum'] = rois_num_per_level + else: + rois_num_per_level = None + + helper.append_op( + type='distribute_fpn_proposals', + inputs=inputs, + outputs=outputs, + attrs={ + 'min_level': min_level, + 'max_level': max_level, + 'refer_level': refer_level, + 'refer_scale': refer_scale, + 'pixel_offset': pixel_offset + }) + return multi_rois, restore_ind, rois_num_per_level + + @paddle.jit.not_to_static def prior_box(input, image, @@ -222,7 +357,7 @@ def prior_box(input, 'min_max_aspect_ratios_order', min_max_aspect_ratios_order) if cur_max_sizes is not None: attrs += ('max_sizes', cur_max_sizes) - box, var = _legacy_C_ops.prior_box(input, image, *attrs) + box, var = C_ops.prior_box(input, image, *attrs) return box, var else: attrs = { @@ -365,8 +500,8 @@ def multiclass_nms(bboxes, score_threshold, 'nms_top_k', nms_top_k, 'nms_threshold', nms_threshold, 'keep_top_k', keep_top_k, 'nms_eta', nms_eta, 'normalized', normalized) - output, index, nms_rois_num = _legacy_C_ops.multiclass_nms3( - bboxes, scores, rois_num, *attrs) + output, index, nms_rois_num = C_ops.multiclass_nms3(bboxes, scores, + rois_num, *attrs) if not return_index: index = None return output, nms_rois_num, index @@ -507,7 +642,7 @@ def matrix_nms(bboxes, nms_top_k, 'gaussian_sigma', gaussian_sigma, 'use_gaussian', use_gaussian, 'keep_top_k', keep_top_k, 'normalized', normalized) - out, index, rois_num = _legacy_C_ops.matrix_nms(bboxes, scores, *attrs) + out, index, rois_num = C_ops.matrix_nms(bboxes, scores, *attrs) if not return_index: index = None if not return_rois_num: @@ -660,12 +795,12 @@ def box_coder(prior_box, if in_dynamic_mode(): if isinstance(prior_box_var, Variable): - output_box = _legacy_C_ops.box_coder( + output_box = C_ops.box_coder( prior_box, prior_box_var, target_box, "code_type", code_type, "box_normalized", box_normalized, "axis", axis) elif isinstance(prior_box_var, list): - output_box = _legacy_C_ops.box_coder( + output_box = C_ops.box_coder( prior_box, None, target_box, "code_type", code_type, "box_normalized", box_normalized, "axis", axis, "variance", prior_box_var) @@ -700,6 +835,154 @@ def box_coder(prior_box, return output_box +@paddle.jit.not_to_static +def generate_proposals(scores, + bbox_deltas, + im_shape, + anchors, + variances, + pre_nms_top_n=6000, + post_nms_top_n=1000, + nms_thresh=0.5, + min_size=0.1, + eta=1.0, + pixel_offset=False, + return_rois_num=False, + name=None): + """ + **Generate proposal Faster-RCNN** + This operation proposes RoIs according to each box with their + probability to be a foreground object and + the box can be calculated by anchors. Bbox_deltais and scores + to be an object are the output of RPN. Final proposals + could be used to train detection net. + For generating proposals, this operation performs following steps: + 1. Transposes and resizes scores and bbox_deltas in size of + (H*W*A, 1) and (H*W*A, 4) + 2. Calculate box locations as proposals candidates. + 3. Clip boxes to image + 4. Remove predicted boxes with small area. + 5. Apply NMS to get final proposals as output. + Args: + scores(Tensor): A 4-D Tensor with shape [N, A, H, W] represents + the probability for each box to be an object. + N is batch size, A is number of anchors, H and W are height and + width of the feature map. The data type must be float32. + bbox_deltas(Tensor): A 4-D Tensor with shape [N, 4*A, H, W] + represents the difference between predicted box location and + anchor location. The data type must be float32. + im_shape(Tensor): A 2-D Tensor with shape [N, 2] represents H, W, the + origin image size or input size. The data type can be float32 or + float64. + anchors(Tensor): A 4-D Tensor represents the anchors with a layout + of [H, W, A, 4]. H and W are height and width of the feature map, + num_anchors is the box count of each position. Each anchor is + in (xmin, ymin, xmax, ymax) format an unnormalized. The data type must be float32. + variances(Tensor): A 4-D Tensor. The expanded variances of anchors with a layout of + [H, W, num_priors, 4]. Each variance is in + (xcenter, ycenter, w, h) format. The data type must be float32. + pre_nms_top_n(float): Number of total bboxes to be kept per + image before NMS. The data type must be float32. `6000` by default. + post_nms_top_n(float): Number of total bboxes to be kept per + image after NMS. The data type must be float32. `1000` by default. + nms_thresh(float): Threshold in NMS. The data type must be float32. `0.5` by default. + min_size(float): Remove predicted boxes with either height or + width < min_size. The data type must be float32. `0.1` by default. + eta(float): Apply in adaptive NMS, if adaptive `threshold > 0.5`, + `adaptive_threshold = adaptive_threshold * eta` in each iteration. + return_rois_num(bool): When setting True, it will return a 1D Tensor with shape [N, ] that includes Rois's + num of each image in one batch. The N is the image's num. For example, the tensor has values [4,5] that represents + the first image has 4 Rois, the second image has 5 Rois. It only used in rcnn model. + 'False' by default. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + tuple: + A tuple with format ``(rpn_rois, rpn_roi_probs)``. + - **rpn_rois**: The generated RoIs. 2-D Tensor with shape ``[N, 4]`` while ``N`` is the number of RoIs. The data type is the same as ``scores``. + - **rpn_roi_probs**: The scores of generated RoIs. 2-D Tensor with shape ``[N, 1]`` while ``N`` is the number of RoIs. The data type is the same as ``scores``. + + Examples: + .. code-block:: python + + import paddle + from ppdet.modeling import ops + paddle.enable_static() + scores = paddle.static.data(name='scores', shape=[None, 4, 5, 5], dtype='float32') + bbox_deltas = paddle.static.data(name='bbox_deltas', shape=[None, 16, 5, 5], dtype='float32') + im_shape = paddle.static.data(name='im_shape', shape=[None, 2], dtype='float32') + anchors = paddle.static.data(name='anchors', shape=[None, 5, 4, 4], dtype='float32') + variances = paddle.static.data(name='variances', shape=[None, 5, 10, 4], dtype='float32') + rois, roi_probs = ops.generate_proposals(scores, bbox_deltas, + im_shape, anchors, variances) + """ + if in_dynamic_mode(): + assert return_rois_num, "return_rois_num should be True in dygraph mode." + attrs = ('pre_nms_topN', pre_nms_top_n, 'post_nms_topN', post_nms_top_n, + 'nms_thresh', nms_thresh, 'min_size', min_size, 'eta', eta, + 'pixel_offset', pixel_offset) + rpn_rois, rpn_roi_probs, rpn_rois_num = C_ops.generate_proposals_v2( + scores, bbox_deltas, im_shape, anchors, variances, *attrs) + if not return_rois_num: + rpn_rois_num = None + return rpn_rois, rpn_roi_probs, rpn_rois_num + + else: + helper = LayerHelper('generate_proposals_v2', **locals()) + + check_variable_and_dtype(scores, 'scores', ['float32'], + 'generate_proposals_v2') + check_variable_and_dtype(bbox_deltas, 'bbox_deltas', ['float32'], + 'generate_proposals_v2') + check_variable_and_dtype(im_shape, 'im_shape', ['float32', 'float64'], + 'generate_proposals_v2') + check_variable_and_dtype(anchors, 'anchors', ['float32'], + 'generate_proposals_v2') + check_variable_and_dtype(variances, 'variances', ['float32'], + 'generate_proposals_v2') + + rpn_rois = helper.create_variable_for_type_inference( + dtype=bbox_deltas.dtype) + rpn_roi_probs = helper.create_variable_for_type_inference( + dtype=scores.dtype) + outputs = { + 'RpnRois': rpn_rois, + 'RpnRoiProbs': rpn_roi_probs, + } + if return_rois_num: + rpn_rois_num = helper.create_variable_for_type_inference( + dtype='int32') + rpn_rois_num.stop_gradient = True + outputs['RpnRoisNum'] = rpn_rois_num + + helper.append_op( + type="generate_proposals_v2", + inputs={ + 'Scores': scores, + 'BboxDeltas': bbox_deltas, + 'ImShape': im_shape, + 'Anchors': anchors, + 'Variances': variances + }, + attrs={ + 'pre_nms_topN': pre_nms_top_n, + 'post_nms_topN': post_nms_top_n, + 'nms_thresh': nms_thresh, + 'min_size': min_size, + 'eta': eta, + 'pixel_offset': pixel_offset + }, + outputs=outputs) + rpn_rois.stop_gradient = True + rpn_roi_probs.stop_gradient = True + if not return_rois_num: + rpn_rois_num = None + + return rpn_rois, rpn_roi_probs, rpn_rois_num + + def sigmoid_cross_entropy_with_logits(input, label, ignore_index=-100, diff --git a/ppdet/modeling/proposal_generator/proposal_generator.py b/ppdet/modeling/proposal_generator/proposal_generator.py index e911909fee98ac9377c4d2b76a61595ce2f4041b..6c722c8cf0872140b77acb2ff9bb1af352cb66e7 100644 --- a/ppdet/modeling/proposal_generator/proposal_generator.py +++ b/ppdet/modeling/proposal_generator/proposal_generator.py @@ -62,16 +62,31 @@ class ProposalGenerator(object): top_n = self.pre_nms_top_n if self.topk_after_collect else self.post_nms_top_n variances = paddle.ones_like(anchors) - rpn_rois, rpn_rois_prob, rpn_rois_num = paddle.vision.ops.generate_proposals( - scores, - bbox_deltas, - im_shape, - anchors, - variances, - pre_nms_top_n=self.pre_nms_top_n, - post_nms_top_n=top_n, - nms_thresh=self.nms_thresh, - min_size=self.min_size, - eta=self.eta, - return_rois_num=True) + if hasattr(paddle.vision.ops, "generate_proposals"): + rpn_rois, rpn_rois_prob, rpn_rois_num = paddle.vision.ops.generate_proposals( + scores, + bbox_deltas, + im_shape, + anchors, + variances, + pre_nms_top_n=self.pre_nms_top_n, + post_nms_top_n=top_n, + nms_thresh=self.nms_thresh, + min_size=self.min_size, + eta=self.eta, + return_rois_num=True) + else: + rpn_rois, rpn_rois_prob, rpn_rois_num = ops.generate_proposals( + scores, + bbox_deltas, + im_shape, + anchors, + variances, + pre_nms_top_n=self.pre_nms_top_n, + post_nms_top_n=top_n, + nms_thresh=self.nms_thresh, + min_size=self.min_size, + eta=self.eta, + return_rois_num=True) + return rpn_rois, rpn_rois_prob, rpn_rois_num, self.post_nms_top_n diff --git a/ppdet/utils/check.py b/ppdet/utils/check.py index 6eb1e79b0f9b3db05d4d13a41412c40426d62a2d..52df359db486ebb569663c0ba6536bf66b0dce24 100644 --- a/ppdet/utils/check.py +++ b/ppdet/utils/check.py @@ -101,18 +101,9 @@ def check_version(version='2.2'): paddle_version.rc ] - # Paddledet develop version is only used on Paddle develop - if version_installed == ['0', '0', '0', '0'] and version != 'develop': - raise Exception( - "PaddlePaddle version {} or higher is required, and develop version is only used for PaddleDetection develop version!". - format(version)) - if version_installed == ['0', '0', '0', '0']: return - if version == 'develop': - raise Exception("PaddlePaddle develop version is required!") - version_split = version.split('.') length = min(len(version_installed), len(version_split)) diff --git a/tools/eval.py b/tools/eval.py index c2ba0c7efd549264556e89a85caf2cb8f4dc1976..42a2fcafa19da6e6cd875dbfc84c9a1a4f26a82a 100755 --- a/tools/eval.py +++ b/tools/eval.py @@ -184,7 +184,7 @@ def main(): check_gpu(cfg.use_gpu) check_npu(cfg.use_npu) check_xpu(cfg.use_xpu) - check_version('develop') + check_version() run(FLAGS, cfg) diff --git a/tools/eval_mot.py b/tools/eval_mot.py index 6bf4d8645f95fe498df20be1e946459d407a2080..a9ca517030a31eb80fd239cca42602355db140d1 100644 --- a/tools/eval_mot.py +++ b/tools/eval_mot.py @@ -125,7 +125,7 @@ def main(): check_gpu(cfg.use_gpu) check_npu(cfg.use_npu) check_xpu(cfg.use_xpu) - check_version('develop') + check_version() run(FLAGS, cfg) diff --git a/tools/export_model.py b/tools/export_model.py index 932f1352b3caac70d0168a160c0c87ae7e9d232b..1eaac7a76723540e7613f43c300bbb1fb3f1e76f 100644 --- a/tools/export_model.py +++ b/tools/export_model.py @@ -99,7 +99,7 @@ def main(): merge_config(FLAGS.opt) check_config(cfg) check_gpu(cfg.use_gpu) - check_version('develop') + check_version() run(FLAGS, cfg) diff --git a/tools/infer.py b/tools/infer.py index 0be9a01986476089dee6ed9e9e627edc3ae3af57..d9bf3166e808d9d50675d31d0c72470ef229c7bd 100755 --- a/tools/infer.py +++ b/tools/infer.py @@ -217,7 +217,7 @@ def main(): check_gpu(cfg.use_gpu) check_npu(cfg.use_npu) check_xpu(cfg.use_xpu) - check_version('develop') + check_version() run(FLAGS, cfg) diff --git a/tools/infer_mot.py b/tools/infer_mot.py index 0f8f6b7bfd6767b002e9e26a276e57751c5fecdd..ef13bff932b923a3f262954c13f09966e5cd1460 100644 --- a/tools/infer_mot.py +++ b/tools/infer_mot.py @@ -137,7 +137,7 @@ def main(): check_gpu(cfg.use_gpu) check_npu(cfg.use_npu) check_xpu(cfg.use_xpu) - check_version('develop') + check_version() run(FLAGS, cfg) diff --git a/tools/post_quant.py b/tools/post_quant.py index 7dc871eedc7b881fbe4b6c8dcf745619b8923bcb..fe6f9c79b5c6fbc08e88874cb84c868684bb1a14 100644 --- a/tools/post_quant.py +++ b/tools/post_quant.py @@ -87,7 +87,7 @@ def main(): merge_config(FLAGS.opt) check_config(cfg) check_gpu(cfg.use_gpu) - check_version('develop') + check_version() run(FLAGS, cfg) diff --git a/tools/train.py b/tools/train.py index f6d5367c182fd2f4e9b2075e1551ed80e546853d..43f883592831ebd2557ea741b6c18f3975688dfe 100755 --- a/tools/train.py +++ b/tools/train.py @@ -163,7 +163,7 @@ def main(): check.check_config(cfg) check.check_gpu(cfg.use_gpu) check.check_npu(cfg.use_npu) - check.check_version('develop') + check.check_version() run(FLAGS, cfg)