diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 274919197827beea00cbeab7a6fcdfb73bd45afd..9a0af76269a7b778c6b159490f3514de692f170b 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -42,14 +42,11 @@ __all__ = [ 'prior_box', 'density_prior_box', 'multi_box_head', - 'bipartite_match', - 'detection_output', 'anchor_generator', 'roi_perspective_transform', 'generate_proposal_labels', 'generate_proposals', 'generate_mask_labels', - 'iou_similarity', 'box_coder', 'polygon_box_transform', 'box_clip', @@ -63,205 +60,6 @@ __all__ = [ ] -def detection_output( - loc, - scores, - prior_box, - prior_box_var, - background_label=0, - nms_threshold=0.3, - nms_top_k=400, - keep_top_k=200, - score_threshold=0.01, - nms_eta=1.0, - return_index=False, -): - """ - - Given the regression locations, classification confidences and prior boxes, - calculate the detection outputs by performing following steps: - - 1. Decode input bounding box predictions according to the prior boxes and - regression locations. - 2. Get the final detection results by applying multi-class non maximum - suppression (NMS). - - Please note, this operation doesn't clip the final output bounding boxes - to the image window. - - Args: - loc(Variable): A 3-D Tensor with shape [N, M, 4] represents the - predicted locations of M bounding bboxes. Data type should be - float32 or float64. N is the batch size, - and each bounding box has four coordinate values and the layout - is [xmin, ymin, xmax, ymax]. - scores(Variable): A 3-D Tensor with shape [N, M, C] represents the - predicted confidence predictions. Data type should be float32 - or float64. N is the batch size, C is the - class number, M is number of bounding boxes. - prior_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes, - each box is represented as [xmin, ymin, xmax, ymax]. Data type - should be float32 or float64. - prior_box_var(Variable): A 2-D Tensor with shape [M, 4] holds M group - of variance. Data type should be float32 or float64. - background_label(int): The index of background label, - the background label will be ignored. If set to -1, then all - categories will be considered. Default: 0. - nms_threshold(float): The threshold to be used in NMS. Default: 0.3. - nms_top_k(int): Maximum number of detections to be kept according - to the confidences after filtering detections based on - score_threshold and before NMS. Default: 400. - keep_top_k(int): Number of total bboxes to be kept per image after - NMS step. -1 means keeping all bboxes after NMS step. Default: 200. - score_threshold(float): Threshold to filter out bounding boxes with - low confidence score. If not provided, consider all boxes. - Default: 0.01. - nms_eta(float): The parameter for adaptive NMS. It works only when the - value is less than 1.0. Default: 1.0. - return_index(bool): Whether return selected index. Default: False - - Returns: - - A tuple with two Variables: (Out, Index) if return_index is True, - otherwise, a tuple with one Variable(Out) is returned. - - Out (Variable): The detection outputs is a LoDTensor with shape [No, 6]. - Data type is the same as input (loc). Each row has six values: - [label, confidence, xmin, ymin, xmax, ymax]. `No` is - the total number of detections in this mini-batch. For each instance, - the offsets in first dimension are called LoD, the offset number is - N + 1, N is the batch size. The i-th image has `LoD[i + 1] - LoD[i]` - detected results, if it is 0, the i-th image has no detected results. - - Index (Variable): Only return when return_index is True. A 2-D LoDTensor - with shape [No, 1] represents the selected index which type is Integer. - The index is the absolute value cross batches. No is the same number - as Out. If the index is used to gather other attribute such as age, - one needs to reshape the input(N, M, 1) to (N * M, 1) as first, where - N is the batch size and M is the number of boxes. - - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - import paddle - - paddle.enable_static() - - pb = fluid.data(name='prior_box', shape=[10, 4], dtype='float32') - pbv = fluid.data(name='prior_box_var', shape=[10, 4], dtype='float32') - loc = fluid.data(name='target_box', shape=[2, 21, 4], dtype='float32') - scores = fluid.data(name='scores', shape=[2, 21, 10], dtype='float32') - nmsed_outs, index = fluid.layers.detection_output(scores=scores, - loc=loc, - prior_box=pb, - prior_box_var=pbv, - return_index=True) - """ - helper = LayerHelper("detection_output", **locals()) - decoded_box = box_coder( - prior_box=prior_box, - prior_box_var=prior_box_var, - target_box=loc, - code_type='decode_center_size', - ) - scores = paddle.nn.functional.softmax(scores) - scores = paddle.transpose(scores, perm=[0, 2, 1]) - scores.stop_gradient = True - nmsed_outs = helper.create_variable_for_type_inference( - dtype=decoded_box.dtype - ) - if return_index: - index = helper.create_variable_for_type_inference(dtype='int') - helper.append_op( - type="multiclass_nms2", - inputs={'Scores': scores, 'BBoxes': decoded_box}, - outputs={'Out': nmsed_outs, 'Index': index}, - attrs={ - 'background_label': 0, - 'nms_threshold': nms_threshold, - 'nms_top_k': nms_top_k, - 'keep_top_k': keep_top_k, - 'score_threshold': score_threshold, - 'nms_eta': 1.0, - }, - ) - index.stop_gradient = True - else: - helper.append_op( - type="multiclass_nms", - inputs={'Scores': scores, 'BBoxes': decoded_box}, - outputs={'Out': nmsed_outs}, - attrs={ - 'background_label': 0, - 'nms_threshold': nms_threshold, - 'nms_top_k': nms_top_k, - 'keep_top_k': keep_top_k, - 'score_threshold': score_threshold, - 'nms_eta': 1.0, - }, - ) - nmsed_outs.stop_gradient = True - if return_index: - return nmsed_outs, index - return nmsed_outs - - -@templatedoc() -def iou_similarity(x, y, box_normalized=True, name=None): - """ - :alias_main: paddle.nn.functional.iou_similarity - :alias: paddle.nn.functional.iou_similarity,paddle.nn.functional.loss.iou_similarity - :old_api: paddle.fluid.layers.iou_similarity - - ${comment} - - Args: - x (Variable): ${x_comment}.The data type is float32 or float64. - y (Variable): ${y_comment}.The data type is float32 or float64. - box_normalized(bool): Whether treat the priorbox as a normalized box. - Set true by default. - Returns: - Variable: ${out_comment}.The data type is same with x. - - Examples: - .. code-block:: python - - import numpy as np - import paddle.fluid as fluid - - use_gpu = False - place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - - x = fluid.data(name='x', shape=[None, 4], dtype='float32') - y = fluid.data(name='y', shape=[None, 4], dtype='float32') - iou = fluid.layers.iou_similarity(x=x, y=y) - - exe.run(fluid.default_startup_program()) - test_program = fluid.default_main_program().clone(for_test=True) - - [out_iou] = exe.run(test_program, - fetch_list=iou, - feed={'x': np.array([[0.5, 0.5, 2.0, 2.0], - [0., 0., 1.0, 1.0]]).astype('float32'), - 'y': np.array([[1.0, 1.0, 2.5, 2.5]]).astype('float32')}) - # out_iou is [[0.2857143], - # [0. ]] with shape: [2, 1] - """ - helper = LayerHelper("iou_similarity", **locals()) - out = helper.create_variable_for_type_inference(dtype=x.dtype) - - helper.append_op( - type="iou_similarity", - inputs={"X": x, "Y": y}, - attrs={"box_normalized": box_normalized}, - outputs={"Out": out}, - ) - return out - - @templatedoc() def box_coder( prior_box, @@ -533,97 +331,6 @@ def detection_map( return map_out -def bipartite_match( - dist_matrix, match_type=None, dist_threshold=None, name=None -): - """ - - This operator implements a greedy bipartite matching algorithm, which is - used to obtain the matching with the maximum distance based on the input - distance matrix. For input 2D matrix, the bipartite matching algorithm can - find the matched column for each row (matched means the largest distance), - also can find the matched row for each column. And this operator only - calculate matched indices from column to row. For each instance, - the number of matched indices is the column number of the input distance - matrix. **The OP only supports CPU**. - - There are two outputs, matched indices and distance. - A simple description, this algorithm matched the best (maximum distance) - row entity to the column entity and the matched indices are not duplicated - in each row of ColToRowMatchIndices. If the column entity is not matched - any row entity, set -1 in ColToRowMatchIndices. - - NOTE: the input DistMat can be LoDTensor (with LoD) or Tensor. - If LoDTensor with LoD, the height of ColToRowMatchIndices is batch size. - If Tensor, the height of ColToRowMatchIndices is 1. - - NOTE: This API is a very low level API. It is used by :code:`ssd_loss` - layer. Please consider to use :code:`ssd_loss` instead. - - Args: - dist_matrix(Variable): This input is a 2-D LoDTensor with shape - [K, M]. The data type is float32 or float64. It is pair-wise - distance matrix between the entities represented by each row and - each column. For example, assumed one entity is A with shape [K], - another entity is B with shape [M]. The dist_matrix[i][j] is the - distance between A[i] and B[j]. The bigger the distance is, the - better matching the pairs are. NOTE: This tensor can contain LoD - information to represent a batch of inputs. One instance of this - batch can contain different numbers of entities. - match_type(str, optional): The type of matching method, should be - 'bipartite' or 'per_prediction'. None ('bipartite') by default. - dist_threshold(float32, optional): If `match_type` is 'per_prediction', - this threshold is to determine the extra matching bboxes based - on the maximum distance, 0.5 by default. - name(str, optional): For detailed information, please refer - to :ref:`api_guide_Name`. Usually name is no need to set and - None by default. - - Returns: - Tuple: - - matched_indices(Variable): A 2-D Tensor with shape [N, M]. The data - type is int32. N is the batch size. If match_indices[i][j] is -1, it - means B[j] does not match any entity in i-th instance. - Otherwise, it means B[j] is matched to row - match_indices[i][j] in i-th instance. The row number of - i-th instance is saved in match_indices[i][j]. - - matched_distance(Variable): A 2-D Tensor with shape [N, M]. The data - type is float32. N is batch size. If match_indices[i][j] is -1, - match_distance[i][j] is also -1.0. Otherwise, assumed - match_distance[i][j] = d, and the row offsets of each instance - are called LoD. Then match_distance[i][j] = - dist_matrix[d+LoD[i]][j]. - - Examples: - - >>> import paddle.fluid as fluid - >>> x = fluid.data(name='x', shape=[None, 4], dtype='float32') - >>> y = fluid.data(name='y', shape=[None, 4], dtype='float32') - >>> iou = fluid.layers.iou_similarity(x=x, y=y) - >>> matched_indices, matched_dist = fluid.layers.bipartite_match(iou) - """ - helper = LayerHelper('bipartite_match', **locals()) - match_indices = helper.create_variable_for_type_inference(dtype='int32') - match_distance = helper.create_variable_for_type_inference( - dtype=dist_matrix.dtype - ) - helper.append_op( - type='bipartite_match', - inputs={'DistMat': dist_matrix}, - attrs={ - 'match_type': match_type, - 'dist_threshold': dist_threshold, - }, - outputs={ - 'ColToRowMatchIndices': match_indices, - 'ColToRowMatchDist': match_distance, - }, - ) - return match_indices, match_distance - - def prior_box( input, image, diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index a2745bbca8e7137ddc9e47c38e40d3183916a284..23bcf526c7e33e319c17f0b268eb8f79b3486d52 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -77,49 +77,6 @@ class LayerTest(unittest.TestCase): class TestDetection(unittest.TestCase): - def test_detection_output(self): - program = Program() - with program_guard(program): - pb = layers.data( - name='prior_box', - shape=[10, 4], - append_batch_size=False, - dtype='float32', - ) - pbv = layers.data( - name='prior_box_var', - shape=[10, 4], - append_batch_size=False, - dtype='float32', - ) - loc = layers.data( - name='target_box', - shape=[2, 10, 4], - append_batch_size=False, - dtype='float32', - ) - scores = layers.data( - name='scores', - shape=[2, 10, 20], - append_batch_size=False, - dtype='float32', - ) - out = layers.detection_output( - scores=scores, loc=loc, prior_box=pb, prior_box_var=pbv - ) - out2, index = layers.detection_output( - scores=scores, - loc=loc, - prior_box=pb, - prior_box_var=pbv, - return_index=True, - ) - self.assertIsNotNone(out) - self.assertIsNotNone(out2) - self.assertIsNotNone(index) - self.assertEqual(out.shape[-1], 6) - print(str(program)) - def test_box_coder_api(self): program = Program() with program_guard(program): diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index e166b0adb09140d6351410b128ad0cea293328a8..e39ed15e28c16eb955d1c9586ae80bd782ca85a0 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -2414,15 +2414,6 @@ class TestBook(LayerTest): out = paddle.scale(input, scale=scale_var) return out - def make_iou_similarity(self): - with program_guard( - fluid.default_main_program(), fluid.default_startup_program() - ): - x = self._get_data(name="x", shape=[4], dtype="float32") - y = self._get_data(name="y", shape=[4], dtype="float32") - out = layers.iou_similarity(x, y, name='iou_similarity') - return out - def make_bilinear_tensor_product_layer(self): with program_guard( fluid.default_main_program(), fluid.default_startup_program()