remove detection_output, iou_similarity and bipartite_match (#48773)

c8497414 · zqw_1997 · GitHub · 83c41459 · c8497414 · c8497414
3 changed file
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -42,14 +42,11 @@ __all__ = [
    'prior_box',
    'density_prior_box',
    'multi_box_head',
-    'bipartite_match',
-    'detection_output',
    'anchor_generator',
    'roi_perspective_transform',
    'generate_proposal_labels',
    'generate_proposals',
    'generate_mask_labels',
-    'iou_similarity',
    'box_coder',
    'polygon_box_transform',
    'box_clip',
@@ -63,205 +60,6 @@ __all__ = [
 ]


-def detection_output(
-    loc,
-    scores,
-    prior_box,
-    prior_box_var,
-    background_label=0,
-    nms_threshold=0.3,
-    nms_top_k=400,
-    keep_top_k=200,
-    score_threshold=0.01,
-    nms_eta=1.0,
-    return_index=False,
-):
-    """
-
-    Given the regression locations, classification confidences and prior boxes,
-    calculate the detection outputs by performing following steps:
-
-    1. Decode input bounding box predictions according to the prior boxes and
-       regression locations.
-    2. Get the final detection results by applying multi-class non maximum
-       suppression (NMS).
-
-    Please note, this operation doesn't clip the final output bounding boxes
-    to the image window.
-
-    Args:
-        loc(Variable): A 3-D Tensor with shape [N, M, 4] represents the
-            predicted locations of M bounding bboxes. Data type should be
-            float32 or float64. N is the batch size,
-            and each bounding box has four coordinate values and the layout
-            is [xmin, ymin, xmax, ymax].
-        scores(Variable): A 3-D Tensor with shape [N, M, C] represents the
-            predicted confidence predictions. Data type should be float32
-            or float64. N is the batch size, C is the
-            class number, M is number of bounding boxes.
-        prior_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes,
-            each box is represented as [xmin, ymin, xmax, ymax]. Data type
-            should be float32 or float64.
-        prior_box_var(Variable): A 2-D Tensor with shape [M, 4] holds M group
-            of variance. Data type should be float32 or float64.
-        background_label(int): The index of background label,
-            the background label will be ignored. If set to -1, then all
-            categories will be considered. Default: 0.
-        nms_threshold(float): The threshold to be used in NMS. Default: 0.3.
-        nms_top_k(int): Maximum number of detections to be kept according
-            to the confidences after filtering detections based on
-            score_threshold and before NMS. Default: 400.
-        keep_top_k(int): Number of total bboxes to be kept per image after
-            NMS step. -1 means keeping all bboxes after NMS step. Default: 200.
-        score_threshold(float): Threshold to filter out bounding boxes with
-            low confidence score. If not provided, consider all boxes.
-            Default: 0.01.
-        nms_eta(float): The parameter for adaptive NMS. It works only when the
-            value is less than 1.0. Default: 1.0.
-        return_index(bool): Whether return selected index. Default: False
-
-    Returns:
-
-        A tuple with two Variables: (Out, Index) if return_index is True,
-        otherwise, a tuple with one Variable(Out) is returned.
-
-        Out (Variable): The detection outputs is a LoDTensor with shape [No, 6].
-        Data type is the same as input (loc). Each row has six values:
-        [label, confidence, xmin, ymin, xmax, ymax]. `No` is
-        the total number of detections in this mini-batch. For each instance,
-        the offsets in first dimension are called LoD, the offset number is
-        N + 1, N is the batch size. The i-th image has `LoD[i + 1] - LoD[i]`
-        detected results, if it is 0, the i-th image has no detected results.
-
-        Index (Variable): Only return when return_index is True. A 2-D LoDTensor
-        with shape [No, 1] represents the selected index which type is Integer.
-        The index is the absolute value cross batches. No is the same number
-        as Out. If the index is used to gather other attribute such as age,
-        one needs to reshape the input(N, M, 1) to (N * M, 1) as first, where
-        N is the batch size and M is the number of boxes.
-
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import paddle
-
-            paddle.enable_static()
-
-            pb = fluid.data(name='prior_box', shape=[10, 4], dtype='float32')
-            pbv = fluid.data(name='prior_box_var', shape=[10, 4], dtype='float32')
-            loc = fluid.data(name='target_box', shape=[2, 21, 4], dtype='float32')
-            scores = fluid.data(name='scores', shape=[2, 21, 10], dtype='float32')
-            nmsed_outs, index = fluid.layers.detection_output(scores=scores,
-                                       loc=loc,
-                                       prior_box=pb,
-                                       prior_box_var=pbv,
-                                       return_index=True)
-    """
-    helper = LayerHelper("detection_output", **locals())
-    decoded_box = box_coder(
-        prior_box=prior_box,
-        prior_box_var=prior_box_var,
-        target_box=loc,
-        code_type='decode_center_size',
-    )
-    scores = paddle.nn.functional.softmax(scores)
-    scores = paddle.transpose(scores, perm=[0, 2, 1])
-    scores.stop_gradient = True
-    nmsed_outs = helper.create_variable_for_type_inference(
-        dtype=decoded_box.dtype
-    )
-    if return_index:
-        index = helper.create_variable_for_type_inference(dtype='int')
-        helper.append_op(
-            type="multiclass_nms2",
-            inputs={'Scores': scores, 'BBoxes': decoded_box},
-            outputs={'Out': nmsed_outs, 'Index': index},
-            attrs={
-                'background_label': 0,
-                'nms_threshold': nms_threshold,
-                'nms_top_k': nms_top_k,
-                'keep_top_k': keep_top_k,
-                'score_threshold': score_threshold,
-                'nms_eta': 1.0,
-            },
-        )
-        index.stop_gradient = True
-    else:
-        helper.append_op(
-            type="multiclass_nms",
-            inputs={'Scores': scores, 'BBoxes': decoded_box},
-            outputs={'Out': nmsed_outs},
-            attrs={
-                'background_label': 0,
-                'nms_threshold': nms_threshold,
-                'nms_top_k': nms_top_k,
-                'keep_top_k': keep_top_k,
-                'score_threshold': score_threshold,
-                'nms_eta': 1.0,
-            },
-        )
-    nmsed_outs.stop_gradient = True
-    if return_index:
-        return nmsed_outs, index
-    return nmsed_outs
-
-
-@templatedoc()
-def iou_similarity(x, y, box_normalized=True, name=None):
-    """
-        :alias_main: paddle.nn.functional.iou_similarity
-        :alias: paddle.nn.functional.iou_similarity,paddle.nn.functional.loss.iou_similarity
-        :old_api: paddle.fluid.layers.iou_similarity
-
-    ${comment}
-
-    Args:
-        x (Variable): ${x_comment}.The data type is float32 or float64.
-        y (Variable): ${y_comment}.The data type is float32 or float64.
-        box_normalized(bool): Whether treat the priorbox as a normalized box.
-            Set true by default.
-    Returns:
-        Variable: ${out_comment}.The data type is same with x.
-
-    Examples:
-        .. code-block:: python
-
-            import numpy as np
-            import paddle.fluid as fluid
-
-            use_gpu = False
-            place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
-            exe = fluid.Executor(place)
-
-            x = fluid.data(name='x', shape=[None, 4], dtype='float32')
-            y = fluid.data(name='y', shape=[None, 4], dtype='float32')
-            iou = fluid.layers.iou_similarity(x=x, y=y)
-
-            exe.run(fluid.default_startup_program())
-            test_program = fluid.default_main_program().clone(for_test=True)
-
-            [out_iou] = exe.run(test_program,
-                    fetch_list=iou,
-                    feed={'x': np.array([[0.5, 0.5, 2.0, 2.0],
-                                         [0., 0., 1.0, 1.0]]).astype('float32'),
-                          'y': np.array([[1.0, 1.0, 2.5, 2.5]]).astype('float32')})
-            # out_iou is [[0.2857143],
-            #             [0.       ]] with shape: [2, 1]
-    """
-    helper = LayerHelper("iou_similarity", **locals())
-    out = helper.create_variable_for_type_inference(dtype=x.dtype)
-
-    helper.append_op(
-        type="iou_similarity",
-        inputs={"X": x, "Y": y},
-        attrs={"box_normalized": box_normalized},
-        outputs={"Out": out},
-    )
-    return out
-
-
 @templatedoc()
 def box_coder(
    prior_box,
@@ -533,97 +331,6 @@ def detection_map(
    return map_out


-def bipartite_match(
-    dist_matrix, match_type=None, dist_threshold=None, name=None
-):
-    """
-
-    This operator implements a greedy bipartite matching algorithm, which is
-    used to obtain the matching with the maximum distance based on the input
-    distance matrix. For input 2D matrix, the bipartite matching algorithm can
-    find the matched column for each row (matched means the largest distance),
-    also can find the matched row for each column. And this operator only
-    calculate matched indices from column to row. For each instance,
-    the number of matched indices is the column number of the input distance
-    matrix. **The OP only supports CPU**.
-
-    There are two outputs, matched indices and distance.
-    A simple description, this algorithm matched the best (maximum distance)
-    row entity to the column entity and the matched indices are not duplicated
-    in each row of ColToRowMatchIndices. If the column entity is not matched
-    any row entity, set -1 in ColToRowMatchIndices.
-
-    NOTE: the input DistMat can be LoDTensor (with LoD) or Tensor.
-    If LoDTensor with LoD, the height of ColToRowMatchIndices is batch size.
-    If Tensor, the height of ColToRowMatchIndices is 1.
-
-    NOTE: This API is a very low level API. It is used by :code:`ssd_loss`
-    layer. Please consider to use :code:`ssd_loss` instead.
-
-    Args:
-        dist_matrix(Variable): This input is a 2-D LoDTensor with shape
-            [K, M]. The data type is float32 or float64. It is pair-wise
-            distance matrix between the entities represented by each row and
-            each column. For example, assumed one entity is A with shape [K],
-            another entity is B with shape [M]. The dist_matrix[i][j] is the
-            distance between A[i] and B[j]. The bigger the distance is, the
-            better matching the pairs are. NOTE: This tensor can contain LoD
-            information to represent a batch of inputs. One instance of this
-            batch can contain different numbers of entities.
-        match_type(str, optional): The type of matching method, should be
-           'bipartite' or 'per_prediction'. None ('bipartite') by default.
-        dist_threshold(float32, optional): If `match_type` is 'per_prediction',
-            this threshold is to determine the extra matching bboxes based
-            on the maximum distance, 0.5 by default.
-        name(str, optional): For detailed information, please refer
-            to :ref:`api_guide_Name`. Usually name is no need to set and
-            None by default.
-
-    Returns:
-        Tuple:
-
-        matched_indices(Variable): A 2-D Tensor with shape [N, M]. The data
-        type is int32. N is the batch size. If match_indices[i][j] is -1, it
-        means B[j] does not match any entity in i-th instance.
-        Otherwise, it means B[j] is matched to row
-        match_indices[i][j] in i-th instance. The row number of
-        i-th instance is saved in match_indices[i][j].
-
-        matched_distance(Variable): A 2-D Tensor with shape [N, M]. The data
-        type is float32. N is batch size. If match_indices[i][j] is -1,
-        match_distance[i][j] is also -1.0. Otherwise, assumed
-        match_distance[i][j] = d, and the row offsets of each instance
-        are called LoD. Then match_distance[i][j] =
-        dist_matrix[d+LoD[i]][j].
-
-    Examples:
-
-        >>> import paddle.fluid as fluid
-        >>> x = fluid.data(name='x', shape=[None, 4], dtype='float32')
-        >>> y = fluid.data(name='y', shape=[None, 4], dtype='float32')
-        >>> iou = fluid.layers.iou_similarity(x=x, y=y)
-        >>> matched_indices, matched_dist = fluid.layers.bipartite_match(iou)
-    """
-    helper = LayerHelper('bipartite_match', **locals())
-    match_indices = helper.create_variable_for_type_inference(dtype='int32')
-    match_distance = helper.create_variable_for_type_inference(
-        dtype=dist_matrix.dtype
-    )
-    helper.append_op(
-        type='bipartite_match',
-        inputs={'DistMat': dist_matrix},
-        attrs={
-            'match_type': match_type,
-            'dist_threshold': dist_threshold,
-        },
-        outputs={
-            'ColToRowMatchIndices': match_indices,
-            'ColToRowMatchDist': match_distance,
-        },
-    )
-    return match_indices, match_distance
-
-
 def prior_box(
    input,
    image,

--- a/python/paddle/fluid/tests/test_detection.py
+++ b/python/paddle/fluid/tests/test_detection.py
@@ -77,49 +77,6 @@ class LayerTest(unittest.TestCase):


 class TestDetection(unittest.TestCase):
-    def test_detection_output(self):
-        program = Program()
-        with program_guard(program):
-            pb = layers.data(
-                name='prior_box',
-                shape=[10, 4],
-                append_batch_size=False,
-                dtype='float32',
-            )
-            pbv = layers.data(
-                name='prior_box_var',
-                shape=[10, 4],
-                append_batch_size=False,
-                dtype='float32',
-            )
-            loc = layers.data(
-                name='target_box',
-                shape=[2, 10, 4],
-                append_batch_size=False,
-                dtype='float32',
-            )
-            scores = layers.data(
-                name='scores',
-                shape=[2, 10, 20],
-                append_batch_size=False,
-                dtype='float32',
-            )
-            out = layers.detection_output(
-                scores=scores, loc=loc, prior_box=pb, prior_box_var=pbv
-            )
-            out2, index = layers.detection_output(
-                scores=scores,
-                loc=loc,
-                prior_box=pb,
-                prior_box_var=pbv,
-                return_index=True,
-            )
-            self.assertIsNotNone(out)
-            self.assertIsNotNone(out2)
-            self.assertIsNotNone(index)
-            self.assertEqual(out.shape[-1], 6)
-        print(str(program))
-
    def test_box_coder_api(self):
        program = Program()
        with program_guard(program):

--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -2414,15 +2414,6 @@ class TestBook(LayerTest):
            out = paddle.scale(input, scale=scale_var)
            return out

-    def make_iou_similarity(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            x = self._get_data(name="x", shape=[4], dtype="float32")
-            y = self._get_data(name="y", shape=[4], dtype="float32")
-            out = layers.iou_similarity(x, y, name='iou_similarity')
-            return out
-
    def make_bilinear_tensor_product_layer(self):
        with program_guard(
            fluid.default_main_program(), fluid.default_startup_program()