Add DIOU nms (#160)

* add diou nms. * fix typo in diou nms and soft nms (lod_leval->lod_level).

Add DIOU nms (#160)
* add diou nms. * fix typo in diou nms and soft nms (lod_leval->lod_level).
759b2dfc · littletomatodonkey · GitHub · 1e6090d8 · 759b2dfc · 759b2dfc
4 changed file
--- a/configs/iou_loss/README.md
+++ b/configs/iou_loss/README.md
@@ -45,4 +45,4 @@
 | :---------------------- | :------------- | :---: | :---: | :-------: | :-----: | :------------: | :----: | :-----: | :----------------------------------------------------------: |
 | ResNet50-vd-FPN            | Faster         | GIOU |   10   |    2     |   1x    |     22.94     |  39.4  |    -    | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_fpn_giou_loss_1x.tar) |
 | ResNet50-vd-FPN            | Faster         | DIOU |   12   |    2     |   1x    |     22.94     |  39.2  |    -    | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_fpn_diou_loss_1x.tar) |
-| ResNet50-vd-FPN            | Faster         | CIOU |   12   |    2     |   1x    |     22.95     |  39.5  |   -   | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_fpn_ciou_loss_1x.tar) |
+| ResNet50-vd-FPN            | Faster         | CIOU |   12   |    2     |   1x    |     22.95     |  39.6  |   -   | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_fpn_ciou_loss_1x.tar) |
--- a/configs/iou_loss/faster_rcnn_r50_vd_fpn_ciou_loss_1x.yml
+++ b/configs/iou_loss/faster_rcnn_r50_vd_fpn_ciou_loss_1x.yml
@@ -75,11 +75,13 @@ BBoxAssigner:
 BBoxHead:
  head: TwoFCHead
-  nms:
+  nms: MultiClassDiouNMS
+  bbox_loss: DiouLoss
+MultiClassDiouNMS:
  keep_top_k: 100
  nms_threshold: 0.5
  score_threshold: 0.05
-  bbox_loss: DiouLoss
 DiouLoss:
  loss_weight: 10.0

--- a/ppdet/modeling/losses/diou_loss.py
+++ b/ppdet/modeling/losses/diou_loss.py
@@ -90,19 +90,21 @@ class DiouLoss(GiouLoss):
                                                        ) - intsctk + eps
        iouk = intsctk / unionk
+        # DIOU term
+        dist_intersection = (cx - cxg) * (cx - cxg) + (cy - cyg) * (cy - cyg)
+        dist_union = (xc2 - xc1) * (xc2 - xc1) + (yc2 - yc1) * (yc2 - yc1)
+        diou_term = (dist_intersection + eps) / (dist_union + eps)
+        # CIOU term
        ciou_term = 0
        if self.use_complete_iou_loss:
-            dist_intersection = (cx - cxg) * (cx - cxg) + (cy - cyg) * (cy - cyg
-                                                                        )
-            dist_union = (xc2 - xc1) * (xc2 - xc1) + (yc2 - yc1) * (yc2 - yc1)
-            d = (dist_intersection + eps) / (dist_union + eps)
            ar_gt = wg / hg
            ar_pred = w / h
            arctan = fluid.layers.atan(ar_gt) - fluid.layers.atan(ar_pred)
            ar_loss = 4. / np.pi / np.pi * arctan * arctan
            alpha = ar_loss / (1 - iouk + ar_loss + eps)
            alpha.stop_gradient = True
-            ciou_term = d + alpha * ar_loss
+            ciou_term = alpha * ar_loss
        iou_weights = 1
        if inside_weight is not None and outside_weight is not None:
@@ -116,6 +118,6 @@ class DiouLoss(GiouLoss):
        class_weight = 2 if self.is_cls_agnostic else self.num_classes
        diou = fluid.layers.reduce_mean(
-            (1 - iouk + ciou_term) * iou_weights) * class_weight
+            (1 - iouk + ciou_term + diou_term) * iou_weights) * class_weight
        return diou * self.loss_weight
--- a/ppdet/modeling/ops.py
+++ b/ppdet/modeling/ops.py
@@ -24,7 +24,7 @@ __all__ = [
    'AnchorGenerator', 'RPNTargetAssign', 'GenerateProposals', 'MultiClassNMS',
    'BBoxAssigner', 'MaskAssigner', 'RoIAlign', 'RoIPool', 'MultiBoxHead',
    'SSDOutputDecoder', 'RetinaTargetAssign', 'RetinaOutputDecoder', 'ConvNorm',
-    'MultiClassSoftNMS'
+    'MultiClassSoftNMS', 'MultiClassDiouNMS'
 ]
@@ -226,9 +226,9 @@ class MultiClassSoftNMS(object):
        self.background_label = background_label
    def __call__(self, bboxes, scores):
-        def create_tmp_var(program, name, dtype, shape, lod_leval):
+        def create_tmp_var(program, name, dtype, shape, lod_level):
            return program.current_block().create_var(
-                name=name, dtype=dtype, shape=shape, lod_leval=lod_leval)
+                name=name, dtype=dtype, shape=shape, lod_level=lod_level)
        def _soft_nms_for_cls(dets, sigma, thres):
            """soft_nms_for_cls"""
@@ -313,12 +313,159 @@ class MultiClassSoftNMS(object):
            name='softnms_pred_result',
            dtype='float32',
            shape=[6],
-            lod_leval=1)
+            lod_level=1)
        fluid.layers.py_func(
            func=_soft_nms, x=[bboxes, scores], out=pred_result)
        return pred_result
+@register
+@serializable
+class MultiClassDiouNMS(object):
+    def __init__(
+            self,
+            score_threshold=0.05,
+            keep_top_k=100,
+            nms_threshold=0.5,
+            normalized=False,
+            background_label=0, ):
+        super(MultiClassDiouNMS, self).__init__()
+        self.score_threshold = score_threshold
+        self.nms_threshold = nms_threshold
+        self.keep_top_k = keep_top_k
+        self.normalized = normalized
+        self.background_label = background_label
+    def __call__(self, bboxes, scores):
+        def create_tmp_var(program, name, dtype, shape, lod_level):
+            return program.current_block().create_var(
+                name=name, dtype=dtype, shape=shape, lod_level=lod_level)
+        def _calc_diou_term(dets1, dets2):
+            eps = 1.e-10
+            eta = 0 if self.normalized else 1
+            x1, y1, x2, y2 = dets1[0], dets1[1], dets1[2], dets1[3]
+            x1g, y1g, x2g, y2g = dets2[0], dets2[1], dets2[2], dets2[3]
+            cx = (x1 + x2) / 2
+            cy = (y1 + y2) / 2
+            w = x2 - x1 + eta
+            h = y2 - y1 + eta
+            cxg = (x1g + x2g) / 2
+            cyg = (y1g + y2g) / 2
+            wg = x2g - x1g + eta
+            hg = y2g - y1g + eta
+            x2 = np.maximum(x1, x2)
+            y2 = np.maximum(y1, y2)
+            # A or B
+            xc1 = np.minimum(x1, x1g)
+            yc1 = np.minimum(y1, y1g)
+            xc2 = np.maximum(x2, x2g)
+            yc2 = np.maximum(y2, y2g)
+            # DIOU term
+            dist_intersection = (cx - cxg)**2 + (cy - cyg)**2
+            dist_union = (xc2 - xc1)**2 + (yc2 - yc1)**2
+            diou_term = (dist_intersection + eps) / (dist_union + eps)
+            return diou_term
+        def _diou_nms_for_cls(dets, thres):
+            """_diou_nms_for_cls"""
+            scores = dets[:, 0]
+            x1 = dets[:, 1]
+            y1 = dets[:, 2]
+            x2 = dets[:, 3]
+            y2 = dets[:, 4]
+            eta = 0 if self.normalized else 1
+            areas = (x2 - x1 + eta) * (y2 - y1 + eta)
+            dt_num = dets.shape[0]
+            order = np.array(range(dt_num))
+            keep = []
+            while order.size > 0:
+                i = order[0]
+                keep.append(i)
+                xx1 = np.maximum(x1[i], x1[order[1:]])
+                yy1 = np.maximum(y1[i], y1[order[1:]])
+                xx2 = np.minimum(x2[i], x2[order[1:]])
+                yy2 = np.minimum(y2[i], y2[order[1:]])
+                w = np.maximum(0.0, xx2 - xx1 + eta)
+                h = np.maximum(0.0, yy2 - yy1 + eta)
+                inter = w * h
+                ovr = inter / (areas[i] + areas[order[1:]] - inter)
+                diou_term = _calc_diou_term([x1[i], y1[i], x2[i], y2[i]], [
+                    x1[order[1:]], y1[order[1:]], x2[order[1:]], y2[order[1:]]
+                ])
+                inds = np.where(ovr - diou_term <= thres)[0]
+                order = order[inds + 1]
+            dets_final = dets[keep]
+            return dets_final
+        def _diou_nms(bboxes, scores):
+            bboxes = np.array(bboxes)
+            scores = np.array(scores)
+            class_nums = scores.shape[-1]
+            score_threshold = self.score_threshold
+            nms_threshold = self.nms_threshold
+            keep_top_k = self.keep_top_k
+            cls_boxes = [[] for _ in range(class_nums)]
+            cls_ids = [[] for _ in range(class_nums)]
+            start_idx = 1 if self.background_label == 0 else 0
+            for j in range(start_idx, class_nums):
+                inds = np.where(scores[:, j] >= score_threshold)[0]
+                scores_j = scores[inds, j]
+                rois_j = bboxes[inds, j, :]
+                dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
+                    np.float32, copy=False)
+                cls_rank = np.argsort(-dets_j[:, 0])
+                dets_j = dets_j[cls_rank]
+                cls_boxes[j] = _diou_nms_for_cls(dets_j, thres=nms_threshold)
+                cls_ids[j] = np.array([j] * cls_boxes[j].shape[0]).reshape(-1,
+                                                                           1)
+            cls_boxes = np.vstack(cls_boxes[start_idx:])
+            cls_ids = np.vstack(cls_ids[start_idx:])
+            pred_result = np.hstack([cls_ids, cls_boxes])
+            # Limit to max_per_image detections **over all classes**
+            image_scores = cls_boxes[:, 0]
+            if len(image_scores) > keep_top_k:
+                image_thresh = np.sort(image_scores)[-keep_top_k]
+                keep = np.where(cls_boxes[:, 0] >= image_thresh)[0]
+                pred_result = pred_result[keep, :]
+            res = fluid.LoDTensor()
+            res.set_lod([[0, pred_result.shape[0]]])
+            if pred_result.shape[0] == 0:
+                pred_result = np.array([[1]], dtype=np.float32)
+            res.set(pred_result, fluid.CPUPlace())
+            return res
+        pred_result = create_tmp_var(
+            fluid.default_main_program(),
+            name='diou_nms_pred_result',
+            dtype='float32',
+            shape=[6],
+            lod_level=1)
+        fluid.layers.py_func(
+            func=_diou_nms, x=[bboxes, scores], out=pred_result)
+        return pred_result
 @register
 class BBoxAssigner(object):
    __op__ = fluid.layers.generate_proposal_labels