From 36fac289ae3e96ea1391ccf84251c593429215c0 Mon Sep 17 00:00:00 2001 From: wangguanzhong Date: Thu, 29 Oct 2020 20:07:27 +0800 Subject: [PATCH] add box_coder (#1631) --- ppdet/modeling/bbox.py | 2 +- ppdet/modeling/ops.py | 153 +++++++++++++++++++++++++++++- ppdet/modeling/tests/test_base.py | 2 + ppdet/modeling/tests/test_ops.py | 84 ++++++++++++---- 4 files changed, 223 insertions(+), 18 deletions(-) diff --git a/ppdet/modeling/bbox.py b/ppdet/modeling/bbox.py index 31e03a92e..cfae09e61 100644 --- a/ppdet/modeling/bbox.py +++ b/ppdet/modeling/bbox.py @@ -253,7 +253,7 @@ class Proposal(object): bbox_delta_s = fluid.layers.slice( bbox_delta_r, axes=[1], starts=[1], ends=[2]) - refined_bbox = fluid.layers.box_coder( + refined_bbox = ops.box_coder( prior_box=rois, prior_box_var=self.proposal_target_generator.bbox_reg_weights[ stage], diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py index a55f8efb2..613b3235a 100644 --- a/ppdet/modeling/ops.py +++ b/ppdet/modeling/ops.py @@ -31,7 +31,7 @@ __all__ = [ 'anchor_generator', #'generate_proposals', 'iou_similarity', - #'box_coder', + 'box_coder', 'yolo_box', 'multiclass_nms', 'distribute_fpn_proposals', @@ -1217,3 +1217,154 @@ def matrix_nms(bboxes, if return_rois_num: return output, rois_num return output + + +def box_coder(prior_box, + prior_box_var, + target_box, + code_type="encode_center_size", + box_normalized=True, + axis=0, + name=None): + """ + **Box Coder Layer** + Encode/Decode the target bounding box with the priorbox information. + + The Encoding schema described below: + .. math:: + ox = (tx - px) / pw / pxv + oy = (ty - py) / ph / pyv + ow = \log(\abs(tw / pw)) / pwv + oh = \log(\abs(th / ph)) / phv + The Decoding schema described below: + + .. math:: + + ox = (pw * pxv * tx * + px) - tw / 2 + oy = (ph * pyv * ty * + py) - th / 2 + ow = \exp(pwv * tw) * pw + tw / 2 + oh = \exp(phv * th) * ph + th / 2 + where `tx`, `ty`, `tw`, `th` denote the target box's center coordinates, + width and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote + the priorbox's (anchor) center coordinates, width and height. `pxv`, + `pyv`, `pwv`, `phv` denote the variance of the priorbox and `ox`, `oy`, + `ow`, `oh` denote the encoded/decoded coordinates, width and height. + During Box Decoding, two modes for broadcast are supported. Say target + box has shape [N, M, 4], and the shape of prior box can be [N, 4] or + [M, 4]. Then prior box will broadcast to target box along the + assigned axis. + + Args: + prior_box(Tensor): Box list prior_box is a 2-D Tensor with shape + [M, 4] holds M boxes and data type is float32 or float64. Each box + is represented as [xmin, ymin, xmax, ymax], [xmin, ymin] is the + left top coordinate of the anchor box, if the input is image feature + map, they are close to the origin of the coordinate system. + [xmax, ymax] is the right bottom coordinate of the anchor box. + prior_box_var(List|Tensor|None): prior_box_var supports three types + of input. One is Tensor with shape [M, 4] which holds M group and + data type is float32 or float64. The second is list consist of + 4 elements shared by all boxes and data type is float32 or float64. + Other is None and not involved in calculation. + target_box(Tensor): This input can be a 2-D LoDTensor with shape + [N, 4] when code_type is 'encode_center_size'. This input also can + be a 3-D Tensor with shape [N, M, 4] when code_type is + 'decode_center_size'. Each box is represented as + [xmin, ymin, xmax, ymax]. The data type is float32 or float64. + code_type(str): The code type used with the target box. It can be + `encode_center_size` or `decode_center_size`. `encode_center_size` + by default. + box_normalized(bool): Whether treat the priorbox as a normalized box. + Set true by default. + axis(int): Which axis in PriorBox to broadcast for box decode, + for example, if axis is 0 and TargetBox has shape [N, M, 4] and + PriorBox has shape [M, 4], then PriorBox will broadcast to [N, M, 4] + for decoding. It is only valid when code type is + `decode_center_size`. Set 0 by default. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + Tensor: + output_box(Tensor): When code_type is 'encode_center_size', the + output tensor of box_coder_op with shape [N, M, 4] representing the + result of N target boxes encoded with M Prior boxes and variances. + When code_type is 'decode_center_size', N represents the batch size + and M represents the number of decoded boxes. + + Examples: + + .. code-block:: python + + import paddle + from ppdet.modeling import ops + paddle.enable_static() + # For encode + prior_box_encode = paddle.static.data(name='prior_box_encode', + shape=[512, 4], + dtype='float32') + target_box_encode = paddle.static.data(name='target_box_encode', + shape=[81, 4], + dtype='float32') + output_encode = ops.box_coder(prior_box=prior_box_encode, + prior_box_var=[0.1,0.1,0.2,0.2], + target_box=target_box_encode, + code_type="encode_center_size") + # For decode + prior_box_decode = paddle.static.data(name='prior_box_decode', + shape=[512, 4], + dtype='float32') + target_box_decode = paddle.static.data(name='target_box_decode', + shape=[512, 81, 4], + dtype='float32') + output_decode = ops.box_coder(prior_box=prior_box_decode, + prior_box_var=[0.1,0.1,0.2,0.2], + target_box=target_box_decode, + code_type="decode_center_size", + box_normalized=False, + axis=1) + """ + check_variable_and_dtype(prior_box, 'prior_box', ['float32', 'float64'], + 'box_coder') + check_variable_and_dtype(target_box, 'target_box', ['float32', 'float64'], + 'box_coder') + + if in_dygraph_mode(): + if isinstance(prior_box_var, Variable): + output_box = core.ops.box_coder( + prior_box, prior_box_var, target_box, "code_type", code_type, + "box_normalized", box_normalized, "axis", axis) + + elif isinstance(prior_box_var, list): + output_box = core.ops.box_coder( + prior_box, target_box, "code_type", code_type, "box_normalized", + box_normalized, "axis", axis, "variance", prior_box_var) + else: + raise TypeError( + "Input variance of box_coder must be Variable or list") + return output_box + + helper = LayerHelper("box_coder", **locals()) + + output_box = helper.create_variable_for_type_inference( + dtype=prior_box.dtype) + + inputs = {"PriorBox": prior_box, "TargetBox": target_box} + attrs = { + "code_type": code_type, + "box_normalized": box_normalized, + "axis": axis + } + if isinstance(prior_box_var, Variable): + inputs['PriorBoxVar'] = prior_box_var + elif isinstance(prior_box_var, list): + attrs['variance'] = prior_box_var + else: + raise TypeError("Input variance of box_coder must be Variable or list") + helper.append_op( + type="box_coder", + inputs=inputs, + attrs=attrs, + outputs={"OutputBox": output_box}) + return output_box diff --git a/ppdet/modeling/tests/test_base.py b/ppdet/modeling/tests/test_base.py index 22191502d..cfd03fe0f 100644 --- a/ppdet/modeling/tests/test_base.py +++ b/ppdet/modeling/tests/test_base.py @@ -44,6 +44,7 @@ class LayerTest(unittest.TestCase): @contextlib.contextmanager def static_graph(self): + paddle.enable_static() scope = fluid.core.Scope() program = Program() with fluid.scope_guard(scope): @@ -66,6 +67,7 @@ class LayerTest(unittest.TestCase): @contextlib.contextmanager def dynamic_graph(self, force_to_use_cpu=False): + paddle.disable_static() with fluid.dygraph.guard( self._get_place(force_to_use_cpu=force_to_use_cpu)): paddle.seed(self.seed) diff --git a/ppdet/modeling/tests/test_ops.py b/ppdet/modeling/tests/test_ops.py index d2097df27..e6295f69b 100644 --- a/ppdet/modeling/tests/test_ops.py +++ b/ppdet/modeling/tests/test_ops.py @@ -64,7 +64,6 @@ class TestCollectFpnProposals(LayerTest): multi_scores_np.append(scores_np) rois_num_per_level_np.append(rois_num) - paddle.enable_static() with self.static_graph(): multi_bboxes = [] multi_scores = [] @@ -104,7 +103,6 @@ class TestCollectFpnProposals(LayerTest): fpn_rois_stat = np.array(fpn_rois_stat) rois_num_stat = np.array(rois_num_stat) - paddle.disable_static() with self.dynamic_graph(): multi_bboxes_dy = [] multi_scores_dy = [] @@ -148,9 +146,7 @@ class TestCollectFpnProposals(LayerTest): multi_scores.append(scores) return multi_bboxes, multi_scores - paddle.enable_static() - program = Program() - with program_guard(program): + with self.static_graph(): bbox1 = paddle.static.data( name='rois', shape=[5, 10, 4], dtype='float32', lod_level=1) score1 = paddle.static.data( @@ -223,8 +219,7 @@ class TestDistributeFpnProposals(LayerTest): self.assertTrue(np.array_equal(res_stat, res_dy)) def test_distribute_fpn_proposals_error(self): - program = Program() - with program_guard(program): + with self.static_graph(): fpn_rois = paddle.static.data( name='data_error', shape=[10, 4], dtype='int32', lod_level=1) self.assertRaises( @@ -282,8 +277,7 @@ class TestROIAlign(LayerTest): self.assertTrue(np.array_equal(output_np, output_dy_np)) def test_roi_align_error(self): - program = Program() - with program_guard(program): + with self.static_graph(): inputs = paddle.static.data( name='inputs', shape=[2, 12, 20, 20], dtype='float32') rois = paddle.static.data( @@ -341,8 +335,7 @@ class TestROIPool(LayerTest): self.assertTrue(np.array_equal(output_np, output_dy_np)) def test_roi_pool_error(self): - program = Program() - with program_guard(program): + with self.static_graph(): inputs = paddle.static.data( name='inputs', shape=[2, 12, 20, 20], dtype='float32') rois = paddle.static.data( @@ -383,7 +376,7 @@ class TestIoUSimilarity(LayerTest): self.assertTrue(np.array_equal(iou_np, iou_dy_np)) -class TestYOLOBox(LayerTest): +class TestYoloBox(LayerTest): def test_yolo_box(self): # x shape [N C H W], C=K * (5 + class_num), class_num=10, K=2 @@ -438,9 +431,7 @@ class TestYOLOBox(LayerTest): self.assertTrue(np.array_equal(scores_np, scores_dy_np)) def test_yolo_box_error(self): - paddle.enable_static() - program = Program() - with program_guard(program): + with self.static_graph(): # x shape [N C H W], C=K * (5 + class_num), class_num=10, K=2 x = paddle.static.data( name='x', shape=[1, 30, 7, 7], dtype='float32') @@ -521,7 +512,6 @@ class TestAnchorGenerator(LayerTest): def test_anchor_generator(self): b, c, h, w = 2, 48, 16, 16 input_np = np.random.rand(2, 48, 16, 16).astype('float32') - paddle.enable_static() with self.static_graph(): input = paddle.static.data( name='input', shape=[b, c, h, w], dtype='float32') @@ -712,5 +702,67 @@ class TestMatrixNMS(LayerTest): return_index=True) +class TestBoxCoder(LayerTest): + def test_box_coder(self): + + prior_box_np = np.random.random((81, 4)).astype('float32') + prior_box_var_np = np.random.random((81, 4)).astype('float32') + target_box_np = np.random.random((20, 81, 4)).astype('float32') + + # static + with self.static_graph(): + prior_box = paddle.static.data( + name='prior_box', shape=[81, 4], dtype='float32') + prior_box_var = paddle.static.data( + name='prior_box_var', shape=[81, 4], dtype='float32') + target_box = paddle.static.data( + name='target_box', shape=[20, 81, 4], dtype='float32') + + boxes = ops.box_coder( + prior_box=prior_box, + prior_box_var=prior_box_var, + target_box=target_box, + code_type="decode_center_size", + box_normalized=False) + + boxes_np, = self.get_static_graph_result( + feed={ + 'prior_box': prior_box_np, + 'prior_box_var': prior_box_var_np, + 'target_box': target_box_np, + }, + fetch_list=[boxes], + with_lod=False) + + # dygraph + with self.dynamic_graph(): + prior_box_dy = base.to_variable(prior_box_np) + prior_box_var_dy = base.to_variable(prior_box_var_np) + target_box_dy = base.to_variable(target_box_np) + + boxes_dy = ops.box_coder( + prior_box=prior_box_dy, + prior_box_var=prior_box_var_dy, + target_box=target_box_dy, + code_type="decode_center_size", + box_normalized=False) + + boxes_dy_np = boxes_dy.numpy() + + self.assertTrue(np.array_equal(boxes_np, boxes_dy_np)) + + def test_box_coder_error(self): + with self.static_graph(): + prior_box = paddle.static.data( + name='prior_box', shape=[81, 4], dtype='int32') + prior_box_var = paddle.static.data( + name='prior_box_var', shape=[81, 4], dtype='float32') + target_box = paddle.static.data( + name='target_box', shape=[20, 81, 4], dtype='float32') + + self.assertRaises(TypeError, ops.box_coder, prior_box, + prior_box_var, target_box) + + if __name__ == '__main__': unittest.main() -- GitLab