From 1976cc4b6ea8463e73f92e4fcf9c313e918f9e25 Mon Sep 17 00:00:00 2001 From: zqw_1997 <118182234+zhengqiwen1997@users.noreply.github.com> Date: Tue, 6 Dec 2022 12:19:18 +0800 Subject: [PATCH] [fluid remove]: remove paddle.fluid.layers.target_assign, paddle.fluid.layers.rpn_target_assign, paddle.fluid.layers.retinanet_target_assign and paddle.fluid.layers.ssd_loss (#48669) * remove paddle.fluid.layers.nn.temporal_shift * code check * rm unittest * remove ssd_loss, target_assigns * remove target_assign, retinanet_target_assign, rpn_target_assign and ssd_loss --- python/paddle/fluid/layers/detection.py | 829 ------------------ python/paddle/fluid/tests/test_detection.py | 149 ---- .../fluid/tests/unittests/test_layers.py | 64 -- .../unittests/test_rpn_target_assign_op.py | 422 --------- 4 files changed, 1464 deletions(-) diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 486daac609..3d277705aa 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -43,11 +43,7 @@ __all__ = [ 'density_prior_box', 'multi_box_head', 'bipartite_match', - 'target_assign', 'detection_output', - 'ssd_loss', - 'rpn_target_assign', - 'retinanet_target_assign', 'anchor_generator', 'roi_perspective_transform', 'generate_proposal_labels', @@ -69,460 +65,6 @@ __all__ = [ ] -def retinanet_target_assign( - bbox_pred, - cls_logits, - anchor_box, - anchor_var, - gt_boxes, - gt_labels, - is_crowd, - im_info, - num_classes=1, - positive_overlap=0.5, - negative_overlap=0.4, -): - r""" - **Target Assign Layer for the detector RetinaNet.** - - This OP finds out positive and negative samples from all anchors - for training the detector `RetinaNet `_ , - and assigns target labels for classification along with target locations for - regression to each sample, then takes out the part belonging to positive and - negative samples from category prediction( :attr:`cls_logits`) and location - prediction( :attr:`bbox_pred`) which belong to all anchors. - - The searching principles for positive and negative samples are as followed: - - 1. Anchors are assigned to ground-truth boxes when it has the highest IoU - overlap with a ground-truth box. - - 2. Anchors are assigned to ground-truth boxes when it has an IoU overlap - higher than :attr:`positive_overlap` with any ground-truth box. - - 3. Anchors are assigned to background when its IoU overlap is lower than - :attr:`negative_overlap` for all ground-truth boxes. - - 4. Anchors which do not meet the above conditions do not participate in - the training process. - - Retinanet predicts a :math:`C`-vector for classification and a 4-vector for box - regression for each anchor, hence the target label for each positive(or negative) - sample is a :math:`C`-vector and the target locations for each positive sample - is a 4-vector. As for a positive sample, if the category of its assigned - ground-truth box is class :math:`i`, the corresponding entry in its length - :math:`C` label vector is set to 1 and all other entries is set to 0, its box - regression targets are computed as the offset between itself and its assigned - ground-truth box. As for a negative sample, all entries in its length :math:`C` - label vector are set to 0 and box regression targets are omitted because - negative samples do not participate in the training process of location - regression. - - After the assignment, the part belonging to positive and negative samples is - taken out from category prediction( :attr:`cls_logits` ), and the part - belonging to positive samples is taken out from location - prediction( :attr:`bbox_pred` ). - - Args: - bbox_pred(Variable): A 3-D Tensor with shape :math:`[N, M, 4]` represents - the predicted locations of all anchors. :math:`N` is the batch size( the - number of images in a mini-batch), :math:`M` is the number of all anchors - of one image, and each anchor has 4 coordinate values. The data type of - :attr:`bbox_pred` is float32 or float64. - cls_logits(Variable): A 3-D Tensor with shape :math:`[N, M, C]` represents - the predicted categories of all anchors. :math:`N` is the batch size, - :math:`M` is the number of all anchors of one image, and :math:`C` is - the number of categories (**Notice: excluding background**). The data type - of :attr:`cls_logits` is float32 or float64. - anchor_box(Variable): A 2-D Tensor with shape :math:`[M, 4]` represents - the locations of all anchors. :math:`M` is the number of all anchors of - one image, each anchor is represented as :math:`[xmin, ymin, xmax, ymax]`, - :math:`[xmin, ymin]` is the left top coordinate of the anchor box, - :math:`[xmax, ymax]` is the right bottom coordinate of the anchor box. - The data type of :attr:`anchor_box` is float32 or float64. Please refer - to the OP :ref:`api_fluid_layers_anchor_generator` - for the generation of :attr:`anchor_box`. - anchor_var(Variable): A 2-D Tensor with shape :math:`[M,4]` represents the expanded - factors of anchor locations used in loss function. :math:`M` is number of - all anchors of one image, each anchor possesses a 4-vector expanded factor. - The data type of :attr:`anchor_var` is float32 or float64. Please refer - to the OP :ref:`api_fluid_layers_anchor_generator` - for the generation of :attr:`anchor_var`. - gt_boxes(Variable): A 1-level 2-D LoDTensor with shape :math:`[G, 4]` represents - locations of all ground-truth boxes. :math:`G` is the total number of - all ground-truth boxes in a mini-batch, and each ground-truth box has 4 - coordinate values. The data type of :attr:`gt_boxes` is float32 or - float64. - gt_labels(variable): A 1-level 2-D LoDTensor with shape :math:`[G, 1]` represents - categories of all ground-truth boxes, and the values are in the range of - :math:`[1, C]`. :math:`G` is the total number of all ground-truth boxes - in a mini-batch, and each ground-truth box has one category. The data type - of :attr:`gt_labels` is int32. - is_crowd(Variable): A 1-level 1-D LoDTensor with shape :math:`[G]` which - indicates whether a ground-truth box is a crowd. If the value is 1, the - corresponding box is a crowd, it is ignored during training. :math:`G` is - the total number of all ground-truth boxes in a mini-batch. The data type - of :attr:`is_crowd` is int32. - im_info(Variable): A 2-D Tensor with shape [N, 3] represents the size - information of input images. :math:`N` is the batch size, the size - information of each image is a 3-vector which are the height and width - of the network input along with the factor scaling the origin image to - the network input. The data type of :attr:`im_info` is float32. - num_classes(int32): The number of categories for classification, the default - value is 1. - positive_overlap(float32): Minimum overlap required between an anchor - and ground-truth box for the anchor to be a positive sample, the default - value is 0.5. - negative_overlap(float32): Maximum overlap allowed between an anchor - and ground-truth box for the anchor to be a negative sample, the default - value is 0.4. :attr:`negative_overlap` should be less than or equal to - :attr:`positive_overlap`, if not, the actual value of - :attr:`positive_overlap` is :attr:`negative_overlap`. - - Returns: - A tuple with 6 Variables: - - **predict_scores** (Variable): A 2-D Tensor with shape :math:`[F+B, C]` represents - category prediction belonging to positive and negative samples. :math:`F` - is the number of positive samples in a mini-batch, :math:`B` is the number - of negative samples, and :math:`C` is the number of categories - (**Notice: excluding background**). The data type of :attr:`predict_scores` - is float32 or float64. - - **predict_location** (Variable): A 2-D Tensor with shape :math:`[F, 4]` represents - location prediction belonging to positive samples. :math:`F` is the number - of positive samples. :math:`F` is the number of positive samples, and each - sample has 4 coordinate values. The data type of :attr:`predict_location` - is float32 or float64. - - **target_label** (Variable): A 2-D Tensor with shape :math:`[F+B, 1]` represents - target labels for classification belonging to positive and negative - samples. :math:`F` is the number of positive samples, :math:`B` is the - number of negative, and each sample has one target category. The data type - of :attr:`target_label` is int32. - - **target_bbox** (Variable): A 2-D Tensor with shape :math:`[F, 4]` represents - target locations for box regression belonging to positive samples. - :math:`F` is the number of positive samples, and each sample has 4 - coordinate values. The data type of :attr:`target_bbox` is float32 or - float64. - - **bbox_inside_weight** (Variable): A 2-D Tensor with shape :math:`[F, 4]` - represents whether a positive sample is fake positive, if a positive - sample is false positive, the corresponding entries in - :attr:`bbox_inside_weight` are set 0, otherwise 1. :math:`F` is the number - of total positive samples in a mini-batch, and each sample has 4 - coordinate values. The data type of :attr:`bbox_inside_weight` is float32 - or float64. - - **fg_num** (Variable): A 2-D Tensor with shape :math:`[N, 1]` represents the number - of positive samples. :math:`N` is the batch size. **Notice: The number - of positive samples is used as the denominator of later loss function, - to avoid the condition that the denominator is zero, this OP has added 1 - to the actual number of positive samples of each image.** The data type of - :attr:`fg_num` is int32. - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - bbox_pred = fluid.data(name='bbox_pred', shape=[1, 100, 4], - dtype='float32') - cls_logits = fluid.data(name='cls_logits', shape=[1, 100, 10], - dtype='float32') - anchor_box = fluid.data(name='anchor_box', shape=[100, 4], - dtype='float32') - anchor_var = fluid.data(name='anchor_var', shape=[100, 4], - dtype='float32') - gt_boxes = fluid.data(name='gt_boxes', shape=[10, 4], - dtype='float32') - gt_labels = fluid.data(name='gt_labels', shape=[10, 1], - dtype='int32') - is_crowd = fluid.data(name='is_crowd', shape=[1], - dtype='int32') - im_info = fluid.data(name='im_info', shape=[1, 3], - dtype='float32') - score_pred, loc_pred, score_target, loc_target, bbox_inside_weight, fg_num = \\ - fluid.layers.retinanet_target_assign(bbox_pred, cls_logits, anchor_box, - anchor_var, gt_boxes, gt_labels, is_crowd, im_info, 10) - - """ - - check_variable_and_dtype( - bbox_pred, - 'bbox_pred', - ['float32', 'float64'], - 'retinanet_target_assign', - ) - check_variable_and_dtype( - cls_logits, - 'cls_logits', - ['float32', 'float64'], - 'retinanet_target_assign', - ) - check_variable_and_dtype( - anchor_box, - 'anchor_box', - ['float32', 'float64'], - 'retinanet_target_assign', - ) - check_variable_and_dtype( - anchor_var, - 'anchor_var', - ['float32', 'float64'], - 'retinanet_target_assign', - ) - check_variable_and_dtype( - gt_boxes, 'gt_boxes', ['float32', 'float64'], 'retinanet_target_assign' - ) - check_variable_and_dtype( - gt_labels, 'gt_labels', ['int32'], 'retinanet_target_assign' - ) - check_variable_and_dtype( - is_crowd, 'is_crowd', ['int32'], 'retinanet_target_assign' - ) - check_variable_and_dtype( - im_info, 'im_info', ['float32', 'float64'], 'retinanet_target_assign' - ) - - helper = LayerHelper('retinanet_target_assign', **locals()) - # Assign target label to anchors - loc_index = helper.create_variable_for_type_inference(dtype='int32') - score_index = helper.create_variable_for_type_inference(dtype='int32') - target_label = helper.create_variable_for_type_inference(dtype='int32') - target_bbox = helper.create_variable_for_type_inference( - dtype=anchor_box.dtype - ) - bbox_inside_weight = helper.create_variable_for_type_inference( - dtype=anchor_box.dtype - ) - fg_num = helper.create_variable_for_type_inference(dtype='int32') - helper.append_op( - type="retinanet_target_assign", - inputs={ - 'Anchor': anchor_box, - 'GtBoxes': gt_boxes, - 'GtLabels': gt_labels, - 'IsCrowd': is_crowd, - 'ImInfo': im_info, - }, - outputs={ - 'LocationIndex': loc_index, - 'ScoreIndex': score_index, - 'TargetLabel': target_label, - 'TargetBBox': target_bbox, - 'BBoxInsideWeight': bbox_inside_weight, - 'ForegroundNumber': fg_num, - }, - attrs={ - 'positive_overlap': positive_overlap, - 'negative_overlap': negative_overlap, - }, - ) - - loc_index.stop_gradient = True - score_index.stop_gradient = True - target_label.stop_gradient = True - target_bbox.stop_gradient = True - bbox_inside_weight.stop_gradient = True - fg_num.stop_gradient = True - - cls_logits = paddle.reshape(x=cls_logits, shape=(-1, num_classes)) - bbox_pred = paddle.reshape(x=bbox_pred, shape=(-1, 4)) - predicted_cls_logits = paddle.gather(cls_logits, score_index) - predicted_bbox_pred = paddle.gather(bbox_pred, loc_index) - - return ( - predicted_cls_logits, - predicted_bbox_pred, - target_label, - target_bbox, - bbox_inside_weight, - fg_num, - ) - - -def rpn_target_assign( - bbox_pred, - cls_logits, - anchor_box, - anchor_var, - gt_boxes, - is_crowd, - im_info, - rpn_batch_size_per_im=256, - rpn_straddle_thresh=0.0, - rpn_fg_fraction=0.5, - rpn_positive_overlap=0.7, - rpn_negative_overlap=0.3, - use_random=True, -): - """ - **Target Assign Layer for region proposal network (RPN) in Faster-RCNN detection.** - - This layer can be, for given the Intersection-over-Union (IoU) overlap - between anchors and ground truth boxes, to assign classification and - regression targets to each each anchor, these target labels are used for - train RPN. The classification targets is a binary class label (of being - an object or not). Following the paper of Faster-RCNN, the positive labels - are two kinds of anchors: (i) the anchor/anchors with the highest IoU - overlap with a ground-truth box, or (ii) an anchor that has an IoU overlap - higher than rpn_positive_overlap(0.7) with any ground-truth box. Note - that a single ground-truth box may assign positive labels to multiple - anchors. A non-positive anchor is when its IoU ratio is lower than - rpn_negative_overlap (0.3) for all ground-truth boxes. Anchors that are - neither positive nor negative do not contribute to the training objective. - The regression targets are the encoded ground-truth boxes associated with - the positive anchors. - - Args: - bbox_pred(Variable): A 3-D Tensor with shape [N, M, 4] represents the - predicted locations of M bounding bboxes. N is the batch size, - and each bounding box has four coordinate values and the layout - is [xmin, ymin, xmax, ymax]. The data type can be float32 or float64. - cls_logits(Variable): A 3-D Tensor with shape [N, M, 1] represents the - predicted confidence predictions. N is the batch size, 1 is the - frontground and background sigmoid, M is number of bounding boxes. - The data type can be float32 or float64. - anchor_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes, - each box is represented as [xmin, ymin, xmax, ymax], - [xmin, ymin] is the left top coordinate of the anchor box, - if the input is image feature map, they are close to the origin - of the coordinate system. [xmax, ymax] is the right bottom - coordinate of the anchor box. The data type can be float32 or float64. - anchor_var(Variable): A 2-D Tensor with shape [M,4] holds expanded - variances of anchors. The data type can be float32 or float64. - gt_boxes (Variable): The ground-truth bounding boxes (bboxes) are a 2D - LoDTensor with shape [Ng, 4], Ng is the total number of ground-truth - bboxes of mini-batch input. The data type can be float32 or float64. - is_crowd (Variable): A 1-D LoDTensor which indicates groud-truth is crowd. - The data type must be int32. - im_info (Variable): A 2-D LoDTensor with shape [N, 3]. N is the batch size, - 3 is the height, width and scale. - rpn_batch_size_per_im(int): Total number of RPN examples per image. - The data type must be int32. - rpn_straddle_thresh(float): Remove RPN anchors that go outside the image - by straddle_thresh pixels. The data type must be float32. - rpn_fg_fraction(float): Target fraction of RoI minibatch that is labeled - foreground (i.e. class > 0), 0-th class is background. The data type must be float32. - rpn_positive_overlap(float): Minimum overlap required between an anchor - and ground-truth box for the (anchor, gt box) pair to be a positive - example. The data type must be float32. - rpn_negative_overlap(float): Maximum overlap allowed between an anchor - and ground-truth box for the (anchor, gt box) pair to be a negative - examples. The data type must be float32. - - Returns: - tuple: - A tuple(predicted_scores, predicted_location, target_label, - target_bbox, bbox_inside_weight) is returned. The predicted_scores - and predicted_location is the predicted result of the RPN. - The target_label and target_bbox is the ground truth, - respectively. The predicted_location is a 2D Tensor with shape - [F, 4], and the shape of target_bbox is same as the shape of - the predicted_location, F is the number of the foreground - anchors. The predicted_scores is a 2D Tensor with shape - [F + B, 1], and the shape of target_label is same as the shape - of the predicted_scores, B is the number of the background - anchors, the F and B is depends on the input of this operator. - Bbox_inside_weight represents whether the predicted loc is fake_fg - or not and the shape is [F, 4]. - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - bbox_pred = fluid.data(name='bbox_pred', shape=[None, 4], dtype='float32') - cls_logits = fluid.data(name='cls_logits', shape=[None, 1], dtype='float32') - anchor_box = fluid.data(name='anchor_box', shape=[None, 4], dtype='float32') - anchor_var = fluid.data(name='anchor_var', shape=[None, 4], dtype='float32') - gt_boxes = fluid.data(name='gt_boxes', shape=[None, 4], dtype='float32') - is_crowd = fluid.data(name='is_crowd', shape=[None], dtype='float32') - im_info = fluid.data(name='im_infoss', shape=[None, 3], dtype='float32') - loc, score, loc_target, score_target, inside_weight = fluid.layers.rpn_target_assign( - bbox_pred, cls_logits, anchor_box, anchor_var, gt_boxes, is_crowd, im_info) - - """ - - helper = LayerHelper('rpn_target_assign', **locals()) - - check_variable_and_dtype( - bbox_pred, 'bbox_pred', ['float32', 'float64'], 'rpn_target_assign' - ) - check_variable_and_dtype( - cls_logits, 'cls_logits', ['float32', 'float64'], 'rpn_target_assign' - ) - check_variable_and_dtype( - anchor_box, 'anchor_box', ['float32', 'float64'], 'rpn_target_assign' - ) - check_variable_and_dtype( - anchor_var, 'anchor_var', ['float32', 'float64'], 'rpn_target_assign' - ) - check_variable_and_dtype( - gt_boxes, 'gt_boxes', ['float32', 'float64'], 'rpn_target_assign' - ) - check_variable_and_dtype( - is_crowd, 'is_crowd', ['int32'], 'rpn_target_assign' - ) - check_variable_and_dtype( - im_info, 'im_info', ['float32', 'float64'], 'rpn_target_assign' - ) - - # Assign target label to anchors - loc_index = helper.create_variable_for_type_inference(dtype='int32') - score_index = helper.create_variable_for_type_inference(dtype='int32') - target_label = helper.create_variable_for_type_inference(dtype='int32') - target_bbox = helper.create_variable_for_type_inference( - dtype=anchor_box.dtype - ) - bbox_inside_weight = helper.create_variable_for_type_inference( - dtype=anchor_box.dtype - ) - helper.append_op( - type="rpn_target_assign", - inputs={ - 'Anchor': anchor_box, - 'GtBoxes': gt_boxes, - 'IsCrowd': is_crowd, - 'ImInfo': im_info, - }, - outputs={ - 'LocationIndex': loc_index, - 'ScoreIndex': score_index, - 'TargetLabel': target_label, - 'TargetBBox': target_bbox, - 'BBoxInsideWeight': bbox_inside_weight, - }, - attrs={ - 'rpn_batch_size_per_im': rpn_batch_size_per_im, - 'rpn_straddle_thresh': rpn_straddle_thresh, - 'rpn_positive_overlap': rpn_positive_overlap, - 'rpn_negative_overlap': rpn_negative_overlap, - 'rpn_fg_fraction': rpn_fg_fraction, - 'use_random': use_random, - }, - ) - - loc_index.stop_gradient = True - score_index.stop_gradient = True - target_label.stop_gradient = True - target_bbox.stop_gradient = True - bbox_inside_weight.stop_gradient = True - - cls_logits = paddle.reshape(x=cls_logits, shape=(-1, 1)) - bbox_pred = paddle.reshape(x=bbox_pred, shape=(-1, 4)) - predicted_cls_logits = paddle.gather(cls_logits, score_index) - predicted_bbox_pred = paddle.gather(bbox_pred, loc_index) - - return ( - predicted_cls_logits, - predicted_bbox_pred, - target_label, - target_bbox, - bbox_inside_weight, - ) - - def detection_output( loc, scores, @@ -1340,377 +882,6 @@ def bipartite_match( return match_indices, match_distance -def target_assign( - input, - matched_indices, - negative_indices=None, - mismatch_value=None, - name=None, -): - """ - - This operator can be, for given the target bounding boxes or labels, - to assign classification and regression targets to each prediction as well as - weights to prediction. The weights is used to specify which prediction would - not contribute to training loss. - - For each instance, the output `out` and`out_weight` are assigned based on - `match_indices` and `negative_indices`. - Assumed that the row offset for each instance in `input` is called lod, - this operator assigns classification/regression targets by performing the - following steps: - - 1. Assigning all outputs based on `match_indices`: - - .. code-block:: text - - If id = match_indices[i][j] > 0, - - out[i][j][0 : K] = X[lod[i] + id][j % P][0 : K] - out_weight[i][j] = 1. - - Otherwise, - - out[j][j][0 : K] = {mismatch_value, mismatch_value, ...} - out_weight[i][j] = 0. - - 2. Assigning outputs based on `neg_indices` if `neg_indices` is provided: - - Assumed that i-th instance in `neg_indices` is called `neg_indice`, - for i-th instance: - - .. code-block:: text - - for id in neg_indice: - out[i][id][0 : K] = {mismatch_value, mismatch_value, ...} - out_weight[i][id] = 1.0 - - Args: - input (Variable): This input is a 3D LoDTensor with shape [M, P, K]. - Data type should be int32 or float32. - matched_indices (Variable): The input matched indices - is 2D Tenosr with shape [N, P], If MatchIndices[i][j] is -1, - the j-th entity of column is not matched to any entity of row in - i-th instance. - negative_indices (Variable, optional): The input negative example indices - are an optional input with shape [Neg, 1] and int32 type, where Neg is - the total number of negative example indices. - mismatch_value (float32, optional): Fill this value to the mismatched - location. - name (string): The default value is None. Normally there is no need for - user to set this property. For more information, please refer - to :ref:`api_guide_Name`. - - Returns: - tuple: A tuple(out, out_weight) is returned. - - out (Variable): a 3D Tensor with shape [N, P, K] and same data type - with `input`, N and P is the same as they are in `matched_indices`, - K is the same as it in input of X. - - out_weight (Variable): the weight for output with the shape of [N, P, 1]. - Data type is float32. - - Examples: - - .. code-block:: python - - import paddle.fluid as fluid - import paddle - paddle.enable_static() - x = fluid.data( - name='x', - shape=[4, 20, 4], - dtype='float', - lod_level=1) - matched_id = fluid.data( - name='indices', - shape=[8, 20], - dtype='int32') - trg, trg_weight = fluid.layers.target_assign( - x, - matched_id, - mismatch_value=0) - """ - helper = LayerHelper('target_assign', **locals()) - out = helper.create_variable_for_type_inference(dtype=input.dtype) - out_weight = helper.create_variable_for_type_inference(dtype='float32') - helper.append_op( - type='target_assign', - inputs={ - 'X': input, - 'MatchIndices': matched_indices, - 'NegIndices': negative_indices, - }, - outputs={'Out': out, 'OutWeight': out_weight}, - attrs={'mismatch_value': mismatch_value}, - ) - return out, out_weight - - -def ssd_loss( - location, - confidence, - gt_box, - gt_label, - prior_box, - prior_box_var=None, - background_label=0, - overlap_threshold=0.5, - neg_pos_ratio=3.0, - neg_overlap=0.5, - loc_loss_weight=1.0, - conf_loss_weight=1.0, - match_type='per_prediction', - mining_type='max_negative', - normalize=True, - sample_size=None, -): - r""" - :alias_main: paddle.nn.functional.ssd_loss - :alias: paddle.nn.functional.ssd_loss,paddle.nn.functional.loss.ssd_loss - :old_api: paddle.fluid.layers.ssd_loss - - **Multi-box loss layer for object detection algorithm of SSD** - - This layer is to compute detection loss for SSD given the location offset - predictions, confidence predictions, prior boxes and ground-truth bounding - boxes and labels, and the type of hard example mining. The returned loss - is a weighted sum of the localization loss (or regression loss) and - confidence loss (or classification loss) by performing the following steps: - - 1. Find matched bounding box by bipartite matching algorithm. - - 1.1 Compute IOU similarity between ground-truth boxes and prior boxes. - - 1.2 Compute matched bounding box by bipartite matching algorithm. - - 2. Compute confidence for mining hard examples - - 2.1. Get the target label based on matched indices. - - 2.2. Compute confidence loss. - - 3. Apply hard example mining to get the negative example indices and update - the matched indices. - - 4. Assign classification and regression targets - - 4.1. Encoded bbox according to the prior boxes. - - 4.2. Assign regression targets. - - 4.3. Assign classification targets. - - 5. Compute the overall objective loss. - - 5.1 Compute confidence loss. - - 5.2 Compute localization loss. - - 5.3 Compute the overall weighted loss. - - Args: - location (Variable): The location predictions are a 3D Tensor with - shape [N, Np, 4], N is the batch size, Np is total number of - predictions for each instance. 4 is the number of coordinate values, - the layout is [xmin, ymin, xmax, ymax].The data type is float32 or - float64. - confidence (Variable): The confidence predictions are a 3D Tensor - with shape [N, Np, C], N and Np are the same as they are in - `location`, C is the class number.The data type is float32 or - float64. - gt_box (Variable): The ground-truth bounding boxes (bboxes) are a 2D - LoDTensor with shape [Ng, 4], Ng is the total number of ground-truth - bboxes of mini-batch input.The data type is float32 or float64. - gt_label (Variable): The ground-truth labels are a 2D LoDTensor - with shape [Ng, 1].Ng is the total number of ground-truth bboxes of - mini-batch input, 1 is the number of class. The data type is float32 - or float64. - prior_box (Variable): The prior boxes are a 2D Tensor with shape [Np, 4]. - Np and 4 are the same as they are in `location`. The data type is - float32 or float64. - prior_box_var (Variable): The variance of prior boxes are a 2D Tensor - with shape [Np, 4]. Np and 4 are the same as they are in `prior_box` - background_label (int): The index of background label, 0 by default. - overlap_threshold (float): If match_type is 'per_prediction', use - 'overlap_threshold' to determine the extra matching bboxes when finding \ - matched boxes. 0.5 by default. - neg_pos_ratio (float): The ratio of the negative boxes to the positive - boxes, used only when mining_type is 'max_negative', 3.0 by default. - neg_overlap (float): The negative overlap upper bound for the unmatched - predictions. Use only when mining_type is 'max_negative', - 0.5 by default. - loc_loss_weight (float): Weight for localization loss, 1.0 by default. - conf_loss_weight (float): Weight for confidence loss, 1.0 by default. - match_type (str): The type of matching method during training, should - be 'bipartite' or 'per_prediction', 'per_prediction' by default. - mining_type (str): The hard example mining type, should be 'hard_example' - or 'max_negative', now only support `max_negative`. - normalize (bool): Whether to normalize the SSD loss by the total number - of output locations, True by default. - sample_size (int): The max sample size of negative box, used only when - mining_type is 'hard_example'. - - Returns: - Variable(Tensor): The weighted sum of the localization loss and confidence loss, \ - with shape [N * Np, 1], N and Np are the same as they are in - `location`.The data type is float32 or float64. - - Raises: - ValueError: If mining_type is 'hard_example', now only support mining \ - type of `max_negative`. - - Examples: - - .. code-block:: python - - import paddle.fluid as fluid - pb = fluid.data( - name='prior_box', - shape=[10, 4], - dtype='float32') - pbv = fluid.data( - name='prior_box_var', - shape=[10, 4], - dtype='float32') - loc = fluid.data(name='target_box', shape=[10, 4], dtype='float32') - scores = fluid.data(name='scores', shape=[10, 21], dtype='float32') - gt_box = fluid.data( - name='gt_box', shape=[4], lod_level=1, dtype='float32') - gt_label = fluid.data( - name='gt_label', shape=[1], lod_level=1, dtype='float32') - loss = fluid.layers.ssd_loss(loc, scores, gt_box, gt_label, pb, pbv) - """ - - helper = LayerHelper('ssd_loss', **locals()) - if mining_type != 'max_negative': - raise ValueError("Only support mining_type == max_negative now.") - - num, num_prior, num_class = confidence.shape - conf_shape = paddle.shape(confidence) - - def __reshape_to_2d(var): - out = paddle.flatten(var, 2, -1) - out = paddle.flatten(out, 0, 1) - return out - - # 1. Find matched bounding box by prior box. - # 1.1 Compute IOU similarity between ground-truth boxes and prior boxes. - iou = iou_similarity(x=gt_box, y=prior_box) - # 1.2 Compute matched bounding box by bipartite matching algorithm. - matched_indices, matched_dist = bipartite_match( - iou, match_type, overlap_threshold - ) - - # 2. Compute confidence for mining hard examples - # 2.1. Get the target label based on matched indices - gt_label = paddle.reshape( - x=gt_label, shape=(len(gt_label.shape) - 1) * (0,) + (-1, 1) - ) - gt_label.stop_gradient = True - target_label, _ = target_assign( - gt_label, matched_indices, mismatch_value=background_label - ) - # 2.2. Compute confidence loss. - # Reshape confidence to 2D tensor. - confidence = __reshape_to_2d(confidence) - target_label = tensor.cast(x=target_label, dtype='int64') - target_label = __reshape_to_2d(target_label) - target_label.stop_gradient = True - conf_loss = softmax_with_cross_entropy(confidence, target_label) - # 3. Mining hard examples - actual_shape = paddle.slice(conf_shape, axes=[0], starts=[0], ends=[2]) - actual_shape.stop_gradient = True - # shape=(-1, 0) is set for compile-time, the correct shape is set by - # actual_shape in runtime. - conf_loss = paddle.reshape(x=conf_loss, shape=actual_shape) - conf_loss.stop_gradient = True - neg_indices = helper.create_variable_for_type_inference(dtype='int32') - dtype = matched_indices.dtype - updated_matched_indices = helper.create_variable_for_type_inference( - dtype=dtype - ) - helper.append_op( - type='mine_hard_examples', - inputs={ - 'ClsLoss': conf_loss, - 'LocLoss': None, - 'MatchIndices': matched_indices, - 'MatchDist': matched_dist, - }, - outputs={ - 'NegIndices': neg_indices, - 'UpdatedMatchIndices': updated_matched_indices, - }, - attrs={ - 'neg_pos_ratio': neg_pos_ratio, - 'neg_dist_threshold': neg_overlap, - 'mining_type': mining_type, - 'sample_size': sample_size, - }, - ) - - # 4. Assign classification and regression targets - # 4.1. Encoded bbox according to the prior boxes. - encoded_bbox = box_coder( - prior_box=prior_box, - prior_box_var=prior_box_var, - target_box=gt_box, - code_type='encode_center_size', - ) - # 4.2. Assign regression targets - target_bbox, target_loc_weight = target_assign( - encoded_bbox, updated_matched_indices, mismatch_value=background_label - ) - # 4.3. Assign classification targets - target_label, target_conf_weight = target_assign( - gt_label, - updated_matched_indices, - negative_indices=neg_indices, - mismatch_value=background_label, - ) - - # 5. Compute loss. - # 5.1 Compute confidence loss. - target_label = __reshape_to_2d(target_label) - target_label = tensor.cast(x=target_label, dtype='int64') - - conf_loss = softmax_with_cross_entropy(confidence, target_label) - target_conf_weight = __reshape_to_2d(target_conf_weight) - conf_loss = conf_loss * target_conf_weight - - # the target_label and target_conf_weight do not have gradient. - target_label.stop_gradient = True - target_conf_weight.stop_gradient = True - - # 5.2 Compute regression loss. - location = __reshape_to_2d(location) - target_bbox = __reshape_to_2d(target_bbox) - - smooth_l1_loss = paddle.nn.loss.SmoothL1Loss() - loc_loss = smooth_l1_loss(location, target_bbox) - target_loc_weight = __reshape_to_2d(target_loc_weight) - loc_loss = loc_loss * target_loc_weight - - # the target_bbox and target_loc_weight do not have gradient. - target_bbox.stop_gradient = True - target_loc_weight.stop_gradient = True - - # 5.3 Compute overall weighted loss. - loss = conf_loss_weight * conf_loss + loc_loss_weight * loc_loss - # reshape to [N, Np], N is the batch size and Np is the prior box number. - # shape=(-1, 0) is set for compile-time, the correct shape is set by - # actual_shape in runtime. - loss = paddle.reshape(x=loss, shape=actual_shape) - loss = paddle.sum(loss, axis=1, keepdim=True) - if normalize: - normalizer = paddle.sum(target_loc_weight) - loss = loss / normalizer - - return loss - - def prior_box( input, image, diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index 7fd3bc2e8b..cf2523947f 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -163,74 +163,6 @@ class TestDetection(unittest.TestCase): code_type='encode_center_size', ) - def test_detection_api(self): - program = Program() - with program_guard(program): - x = layers.data(name='x', shape=[4], dtype='float32') - y = layers.data(name='y', shape=[4], dtype='float32') - z = layers.data(name='z', shape=[4], dtype='float32', lod_level=1) - iou = layers.iou_similarity(x=x, y=y) - bcoder = layers.box_coder( - prior_box=x, - prior_box_var=y, - target_box=z, - code_type='encode_center_size', - ) - self.assertIsNotNone(iou) - self.assertIsNotNone(bcoder) - - matched_indices, matched_dist = layers.bipartite_match(iou) - self.assertIsNotNone(matched_indices) - self.assertIsNotNone(matched_dist) - - gt = layers.data( - name='gt', shape=[1, 1], dtype='int32', lod_level=1 - ) - trg, trg_weight = layers.target_assign( - gt, matched_indices, mismatch_value=0 - ) - self.assertIsNotNone(trg) - self.assertIsNotNone(trg_weight) - - gt2 = layers.data( - name='gt2', shape=[10, 4], dtype='float32', lod_level=1 - ) - trg, trg_weight = layers.target_assign( - gt2, matched_indices, mismatch_value=0 - ) - self.assertIsNotNone(trg) - self.assertIsNotNone(trg_weight) - - print(str(program)) - - def test_ssd_loss(self): - program = Program() - with program_guard(program): - pb = layers.data( - name='prior_box', - shape=[10, 4], - append_batch_size=False, - dtype='float32', - ) - pbv = layers.data( - name='prior_box_var', - shape=[10, 4], - append_batch_size=False, - dtype='float32', - ) - loc = layers.data(name='target_box', shape=[10, 4], dtype='float32') - scores = layers.data(name='scores', shape=[10, 21], dtype='float32') - gt_box = layers.data( - name='gt_box', shape=[4], lod_level=1, dtype='float32' - ) - gt_label = layers.data( - name='gt_label', shape=[1], lod_level=1, dtype='int32' - ) - loss = layers.ssd_loss(loc, scores, gt_box, gt_label, pb, pbv) - self.assertIsNotNone(loss) - self.assertEqual(loss.shape[-1], 1) - print(str(program)) - class TestPriorBox(unittest.TestCase): def test_prior_box(self): @@ -521,87 +453,6 @@ class TestDetectionMAP(unittest.TestCase): print(str(program)) -class TestRpnTargetAssign(unittest.TestCase): - def test_rpn_target_assign(self): - program = Program() - with program_guard(program): - bbox_pred_shape = [10, 50, 4] - cls_logits_shape = [10, 50, 2] - anchor_shape = [50, 4] - - bbox_pred = layers.data( - name='bbox_pred', - shape=bbox_pred_shape, - append_batch_size=False, - dtype='float32', - ) - cls_logits = layers.data( - name='cls_logits', - shape=cls_logits_shape, - append_batch_size=False, - dtype='float32', - ) - anchor_box = layers.data( - name='anchor_box', - shape=anchor_shape, - append_batch_size=False, - dtype='float32', - ) - anchor_var = layers.data( - name='anchor_var', - shape=anchor_shape, - append_batch_size=False, - dtype='float32', - ) - gt_boxes = layers.data( - name='gt_boxes', shape=[4], lod_level=1, dtype='float32' - ) - is_crowd = layers.data( - name='is_crowd', - shape=[1, 10], - dtype='int32', - lod_level=1, - append_batch_size=False, - ) - im_info = layers.data( - name='im_info', - shape=[1, 3], - dtype='float32', - lod_level=1, - append_batch_size=False, - ) - outs = layers.rpn_target_assign( - bbox_pred=bbox_pred, - cls_logits=cls_logits, - anchor_box=anchor_box, - anchor_var=anchor_var, - gt_boxes=gt_boxes, - is_crowd=is_crowd, - im_info=im_info, - rpn_batch_size_per_im=256, - rpn_straddle_thresh=0.0, - rpn_fg_fraction=0.5, - rpn_positive_overlap=0.7, - rpn_negative_overlap=0.3, - use_random=False, - ) - pred_scores = outs[0] - pred_loc = outs[1] - tgt_lbl = outs[2] - tgt_bbox = outs[3] - bbox_inside_weight = outs[4] - - self.assertIsNotNone(pred_scores) - self.assertIsNotNone(pred_loc) - self.assertIsNotNone(tgt_lbl) - self.assertIsNotNone(tgt_bbox) - self.assertIsNotNone(bbox_inside_weight) - assert pred_scores.shape[1] == 1 - assert pred_loc.shape[1] == 4 - assert pred_loc.shape[1] == tgt_bbox.shape[1] - print(str(program)) - - class TestGenerateProposals(LayerTest): def test_generate_proposals(self): scores_np = np.random.rand(2, 3, 4, 4).astype('float32') diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index dcf9d4d100..2258e3807c 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -3288,70 +3288,6 @@ class TestBook(LayerTest): ) return out - def test_retinanet_target_assign(self): - with program_guard( - fluid.default_main_program(), fluid.default_startup_program() - ): - bbox_pred = layers.data( - name='bbox_pred', - shape=[1, 100, 4], - append_batch_size=False, - dtype='float32', - ) - cls_logits = layers.data( - name='cls_logits', - shape=[1, 100, 10], - append_batch_size=False, - dtype='float32', - ) - anchor_box = layers.data( - name='anchor_box', - shape=[100, 4], - append_batch_size=False, - dtype='float32', - ) - anchor_var = layers.data( - name='anchor_var', - shape=[100, 4], - append_batch_size=False, - dtype='float32', - ) - gt_boxes = layers.data( - name='gt_boxes', - shape=[10, 4], - append_batch_size=False, - dtype='float32', - ) - gt_labels = layers.data( - name='gt_labels', - shape=[10, 1], - append_batch_size=False, - dtype='int32', - ) - is_crowd = layers.data( - name='is_crowd', - shape=[1], - append_batch_size=False, - dtype='int32', - ) - im_info = layers.data( - name='im_info', - shape=[1, 3], - append_batch_size=False, - dtype='float32', - ) - return layers.retinanet_target_assign( - bbox_pred, - cls_logits, - anchor_box, - anchor_var, - gt_boxes, - gt_labels, - is_crowd, - im_info, - 10, - ) - def test_addmm(self): with program_guard( fluid.default_main_program(), fluid.default_startup_program() diff --git a/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py b/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py index 98cad29ac2..d0147d8b70 100644 --- a/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py +++ b/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py @@ -23,9 +23,6 @@ from test_generate_proposal_labels_op import ( _generate_groundtruth, ) -import paddle.fluid as fluid -from paddle.fluid import Program, program_guard - def rpn_target_assign( anchor_by_gt_overlap, @@ -485,424 +482,5 @@ class TestRetinanetTargetAssignOp(OpTest): self.check_output() -class TestRetinanetTargetAssignOpError(unittest.TestCase): - def test_errors(self): - with program_guard(Program(), Program()): - bbox_pred1 = fluid.data( - name='bbox_pred1', shape=[1, 100, 4], dtype='float32' - ) - cls_logits1 = fluid.data( - name='cls_logits1', shape=[1, 100, 10], dtype='float32' - ) - anchor_box1 = fluid.data( - name='anchor_box1', shape=[100, 4], dtype='float32' - ) - anchor_var1 = fluid.data( - name='anchor_var1', shape=[100, 4], dtype='float32' - ) - gt_boxes1 = fluid.data( - name='gt_boxes1', shape=[10, 4], dtype='float32' - ) - gt_labels1 = fluid.data( - name='gt_labels1', shape=[10, 1], dtype='int32' - ) - is_crowd1 = fluid.data(name='is_crowd1', shape=[1], dtype='float32') - im_info1 = fluid.data( - name='im_info1', shape=[1, 3], dtype='float32' - ) - - # The `bbox_pred` must be Variable and the data type of `bbox_pred` Tensor - # one of float32 and float64. - def test_bbox_pred_type(): - ( - score_pred, - loc_pred, - score_target, - loc_target, - bbox_inside_weight, - fg_num, - ) = fluid.layers.retinanet_target_assign( - [1], - cls_logits1, - anchor_box1, - anchor_var1, - gt_boxes1, - gt_labels1, - is_crowd1, - im_info1, - 10, - ) - - self.assertRaises(TypeError, test_bbox_pred_type) - - def test_bbox_pred_tensor_dtype(): - bbox_pred2 = fluid.data( - name='bbox_pred2', shape=[1, 100, 4], dtype='intt32' - ) - ( - score_pred, - loc_pred, - score_target, - loc_target, - bbox_inside_weight, - fg_num, - ) = fluid.layers.retinanet_target_assign( - bbox_pred2, - cls_logits1, - anchor_box1, - anchor_var1, - gt_boxes1, - gt_labels1, - is_crowd1, - im_info1, - 10, - ) - - self.assertRaises(TypeError, test_bbox_pred_tensor_dtype) - - # The `cls_logits` must be Variable and the data type of `cls_logits` Tensor - # one of float32 and float64. - def test_cls_logits_type(): - ( - score_pred, - loc_pred, - score_target, - loc_target, - bbox_inside_weight, - fg_num, - ) = fluid.layers.retinanet_target_assign( - bbox_pred1, - 2, - anchor_box1, - anchor_var1, - gt_boxes1, - gt_labels1, - is_crowd1, - im_info1, - 10, - ) - - self.assertRaises(TypeError, test_cls_logits_type) - - def test_cls_logits_tensor_dtype(): - cls_logits2 = fluid.data( - name='cls_logits2', shape=[1, 100, 10], dtype='int32' - ) - ( - score_pred, - loc_pred, - score_target, - loc_target, - bbox_inside_weight, - fg_num, - ) = fluid.layers.retinanet_target_assign( - bbox_pred1, - cls_logits2, - anchor_box1, - anchor_var1, - gt_boxes1, - gt_labels1, - is_crowd1, - im_info1, - 10, - ) - - self.assertRaises(TypeError, test_cls_logits_tensor_dtype) - - # The `anchor_box` must be Variable and the data type of `anchor_box` Tensor - # one of float32 and float64. - def test_anchor_box_type(): - ( - score_pred, - loc_pred, - score_target, - loc_target, - bbox_inside_weight, - fg_num, - ) = fluid.layers.retinanet_target_assign( - bbox_pred1, - cls_logits1, - [5], - anchor_var1, - gt_boxes1, - gt_labels1, - is_crowd1, - im_info1, - 10, - ) - - self.assertRaises(TypeError, test_anchor_box_type) - - def test_anchor_box_tensor_dtype(): - anchor_box2 = fluid.data( - name='anchor_box2', shape=[100, 4], dtype='int32' - ) - ( - score_pred, - loc_pred, - score_target, - loc_target, - bbox_inside_weight, - fg_num, - ) = fluid.layers.retinanet_target_assign( - bbox_pred1, - cls_logits1, - anchor_box2, - anchor_var1, - gt_boxes1, - gt_labels1, - is_crowd1, - im_info1, - 10, - ) - - self.assertRaises(TypeError, test_anchor_box_tensor_dtype) - - # The `anchor_var` must be Variable and the data type of `anchor_var` Tensor - # one of float32 and float64. - def test_anchor_var_type(): - ( - score_pred, - loc_pred, - score_target, - loc_target, - bbox_inside_weight, - fg_num, - ) = fluid.layers.retinanet_target_assign( - bbox_pred1, - cls_logits1, - anchor_box1, - 5, - gt_boxes1, - gt_labels1, - is_crowd1, - im_info1, - 10, - ) - - self.assertRaises(TypeError, test_anchor_var_type) - - def test_anchor_var_tensor_dtype(): - anchor_var2 = fluid.data( - name='anchor_var2', shape=[100, 4], dtype='int32' - ) - ( - score_pred, - loc_pred, - score_target, - loc_target, - bbox_inside_weight, - fg_num, - ) = fluid.layers.retinanet_target_assign( - bbox_pred1, - cls_logits1, - anchor_box1, - anchor_var2, - gt_boxes1, - gt_labels1, - is_crowd1, - im_info1, - 10, - ) - - self.assertRaises(TypeError, test_anchor_var_tensor_dtype) - - # The `gt_boxes` must be Variable and the data type of `gt_boxes` Tensor - # one of float32 and float64. - def test_gt_boxes_type(): - ( - score_pred, - loc_pred, - score_target, - loc_target, - bbox_inside_weight, - fg_num, - ) = fluid.layers.retinanet_target_assign( - bbox_pred1, - cls_logits1, - anchor_box1, - anchor_var1, - [4], - gt_labels1, - is_crowd1, - im_info1, - 10, - ) - - self.assertRaises(TypeError, test_gt_boxes_type) - - def test_gt_boxes_tensor_dtype(): - gt_boxes2 = fluid.data( - name='gt_boxes2', shape=[10, 4], dtype='int32' - ) - ( - score_pred, - loc_pred, - score_target, - loc_target, - bbox_inside_weight, - fg_num, - ) = fluid.layers.retinanet_target_assign( - bbox_pred1, - cls_logits1, - anchor_box1, - anchor_var1, - gt_boxes2, - gt_labels1, - is_crowd1, - im_info1, - 10, - ) - - self.assertRaises(TypeError, test_gt_boxes_tensor_dtype) - - # The `gt_label` must be Variable and the data type of `gt_label` Tensor - # int32. - def test_gt_label_type(): - ( - score_pred, - loc_pred, - score_target, - loc_target, - bbox_inside_weight, - fg_num, - ) = fluid.layers.retinanet_target_assign( - bbox_pred1, - cls_logits1, - anchor_box1, - anchor_var1, - gt_boxes1, - 9, - is_crowd1, - im_info1, - 10, - ) - - self.assertRaises(TypeError, test_gt_label_type) - - def test_gt_label_tensor_dtype(): - gt_labels2 = fluid.data( - name='label2', shape=[10, 1], dtype='float32' - ) - ( - score_pred, - loc_pred, - score_target, - loc_target, - bbox_inside_weight, - fg_num, - ) = fluid.layers.retinanet_target_assign( - bbox_pred1, - cls_logits1, - anchor_box1, - anchor_var1, - gt_boxes1, - gt_labels2, - is_crowd1, - im_info1, - 10, - ) - - self.assertRaises(TypeError, test_gt_label_tensor_dtype) - - # The `is_crowd` must be Variable and the data type of `is_crowd` Tensor - # int32. - def test_is_crowd_type(): - ( - score_pred, - loc_pred, - score_target, - loc_target, - bbox_inside_weight, - fg_num, - ) = fluid.layers.retinanet_target_assign( - bbox_pred1, - cls_logits1, - anchor_box1, - anchor_var1, - gt_boxes1, - gt_labels1, - [10], - im_info1, - 10, - ) - - self.assertRaises(TypeError, test_is_crowd_type) - - def test_is_crowd_tensor_dtype(): - is_crowd2 = fluid.data( - name='is_crowd2', shape=[10, 1], dtype='float32' - ) - ( - score_pred, - loc_pred, - score_target, - loc_target, - bbox_inside_weight, - fg_num, - ) = fluid.layers.retinanet_target_assign( - bbox_pred1, - cls_logits1, - anchor_box1, - anchor_var1, - gt_boxes1, - gt_labels1, - is_crowd2, - im_info1, - 10, - ) - - self.assertRaises(TypeError, test_is_crowd_tensor_dtype) - - # The `im_info` must be Variable and the data type of `im_info` Tensor - # must be one of float32 and float64. - def test_im_info_type(): - ( - score_pred, - loc_pred, - score_target, - loc_target, - bbox_inside_weight, - fg_num, - ) = fluid.layers.retinanet_target_assign( - bbox_pred1, - cls_logits1, - anchor_box1, - anchor_var1, - gt_boxes1, - gt_labels1, - is_crowd1, - 1, - 10, - ) - - self.assertRaises(TypeError, test_im_info_type) - - def test_im_info_tensor_dtype(): - im_info2 = fluid.data( - name='im_info2', shape=[1, 3], dtype='int32' - ) - ( - score_pred, - loc_pred, - score_target, - loc_target, - bbox_inside_weight, - fg_num, - ) = fluid.layers.retinanet_target_assign( - bbox_pred1, - cls_logits1, - anchor_box1, - anchor_var1, - gt_boxes1, - gt_labels1, - is_crowd1, - im_info2, - 10, - ) - - self.assertRaises(TypeError, test_im_info_tensor_dtype) - - if __name__ == '__main__': unittest.main() -- GitLab