From bf700a8e700b69149c65d849cf942ba2959b19ab Mon Sep 17 00:00:00 2001
From: Kaipeng Deng <dengkaipeng@baidu.com>
Date: Sat, 30 Jan 2021 20:32:56 +0800
Subject: [PATCH] fix ssd background to last (#2145)

---
 dygraph/ppdet/engine/trainer.py             |  1 +
 dygraph/ppdet/metrics/map_utils.py          | 14 +++++++-------
 dygraph/ppdet/metrics/metrics.py            |  7 ++++++-
 dygraph/ppdet/modeling/architectures/ssd.py | 12 +++++++++++-
 dygraph/ppdet/modeling/heads/ssd_head.py    | 13 +++++++------
 dygraph/ppdet/modeling/losses/ssd_loss.py   | 11 ++++++++---
 6 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/dygraph/ppdet/engine/trainer.py b/dygraph/ppdet/engine/trainer.py
index 20ea16b7a..443e56451 100644
--- a/dygraph/ppdet/engine/trainer.py
+++ b/dygraph/ppdet/engine/trainer.py
@@ -165,6 +165,7 @@ class Trainer(object):
         if not self._weights_loaded:
             self.load_weights(self.cfg.pretrain_weights)
 
+        model = self.model
         if self._nranks > 1:
             model = paddle.DataParallel(self.model)
         else:
diff --git a/dygraph/ppdet/metrics/map_utils.py b/dygraph/ppdet/metrics/map_utils.py
index 51d06bb26..620200be9 100644
--- a/dygraph/ppdet/metrics/map_utils.py
+++ b/dygraph/ppdet/metrics/map_utils.py
@@ -102,7 +102,7 @@ class DetectionMAP(object):
         self.evaluate_difficult = evaluate_difficult
         self.reset()
 
-    def update(self, bbox, gt_box, gt_label, difficult=None):
+    def update(self, bbox, score, label, gt_box, gt_label, difficult=None):
         """
         Update metric statics from given prediction and ground
         truth infomations.
@@ -117,13 +117,13 @@ class DetectionMAP(object):
 
         # record class score positive
         visited = [False] * len(gt_label)
-        for b in bbox:
-            label, score, xmin, ymin, xmax, ymax = b.tolist()
+        for b, s, l in zip(bbox, score, label):
+            xmin, ymin, xmax, ymax = b.tolist()
             pred = [xmin, ymin, xmax, ymax]
             max_idx = -1
             max_overlap = -1.0
             for i, gl in enumerate(gt_label):
-                if int(gl) == int(label):
+                if int(gl) == int(l):
                     overlap = jaccard_overlap(pred, gt_box[i],
                                               self.is_bbox_normalized)
                     if overlap > max_overlap:
@@ -134,12 +134,12 @@ class DetectionMAP(object):
                 if self.evaluate_difficult or \
                         int(np.array(difficult[max_idx])) == 0:
                     if not visited[max_idx]:
-                        self.class_score_poss[int(label)].append([score, 1.0])
+                        self.class_score_poss[int(l)].append([s, 1.0])
                         visited[max_idx] = True
                     else:
-                        self.class_score_poss[int(label)].append([score, 0.0])
+                        self.class_score_poss[int(l)].append([s, 0.0])
             else:
-                self.class_score_poss[int(label)].append([score, 0.0])
+                self.class_score_poss[int(l)].append([s, 0.0])
 
     def reset(self):
         """
diff --git a/dygraph/ppdet/metrics/metrics.py b/dygraph/ppdet/metrics/metrics.py
index 644658cf4..6647b8382 100644
--- a/dygraph/ppdet/metrics/metrics.py
+++ b/dygraph/ppdet/metrics/metrics.py
@@ -148,6 +148,8 @@ class VOCMetric(Metric):
 
     def update(self, inputs, outputs):
         bboxes = outputs['bbox'].numpy()
+        scores = outputs['score'].numpy()
+        labels = outputs['label'].numpy()
         bbox_lengths = outputs['bbox_num'].numpy()
 
         if bboxes.shape == (1, 1) or bboxes is None:
@@ -171,9 +173,12 @@ class VOCMetric(Metric):
                             else difficults[i]
             bbox_num = bbox_lengths[i]
             bbox = bboxes[bbox_idx:bbox_idx + bbox_num]
+            score = scores[bbox_idx:bbox_idx + bbox_num]
+            label = labels[bbox_idx:bbox_idx + bbox_num]
             gt_box, gt_label, difficult = prune_zero_padding(gt_box, gt_label,
                                                              difficult)
-            self.detection_map.update(bbox, gt_box, gt_label, difficult)
+            self.detection_map.update(bbox, score, label, gt_box, gt_label,
+                                      difficult)
             bbox_idx += bbox_num
 
     def accumulate(self):
diff --git a/dygraph/ppdet/modeling/architectures/ssd.py b/dygraph/ppdet/modeling/architectures/ssd.py
index 55ff07efe..4d195191f 100644
--- a/dygraph/ppdet/modeling/architectures/ssd.py
+++ b/dygraph/ppdet/modeling/architectures/ssd.py
@@ -54,4 +54,14 @@ class SSD(BaseArch):
         return {"loss": self._forward()}
 
     def get_pred(self):
-        return dict(zip(['bbox', 'bbox_num'], self._forward()))
+        bbox_pred, bbox_num = self._forward()
+        label = bbox_pred[:, 0]
+        score = bbox_pred[:, 1]
+        bbox = bbox_pred[:, 2:]
+        output = {
+            'bbox': bbox,
+            'score': score,
+            'label': label,
+            'bbox_num': bbox_num
+        }
+        return output
diff --git a/dygraph/ppdet/modeling/heads/ssd_head.py b/dygraph/ppdet/modeling/heads/ssd_head.py
index fb004c498..3ad8259e9 100644
--- a/dygraph/ppdet/modeling/heads/ssd_head.py
+++ b/dygraph/ppdet/modeling/heads/ssd_head.py
@@ -58,7 +58,7 @@ class SSDHead(nn.Layer):
     __inject__ = ['anchor_generator', 'loss']
 
     def __init__(self,
-                 num_classes=81,
+                 num_classes=80,
                  in_channels=(512, 1024, 512, 256, 256, 256),
                  anchor_generator=AnchorGeneratorSSD().__dict__,
                  kernel_size=3,
@@ -67,7 +67,8 @@ class SSDHead(nn.Layer):
                  conv_decay=0.,
                  loss='SSDLoss'):
         super(SSDHead, self).__init__()
-        self.num_classes = num_classes
+        # add background class
+        self.num_classes = num_classes + 1
         self.in_channels = in_channels
         self.anchor_generator = anchor_generator
         self.loss = loss
@@ -106,7 +107,7 @@ class SSDHead(nn.Layer):
                     score_conv_name,
                     nn.Conv2D(
                         in_channels=in_channels[i],
-                        out_channels=num_prior * num_classes,
+                        out_channels=num_prior * self.num_classes,
                         kernel_size=kernel_size,
                         padding=padding))
             else:
@@ -114,7 +115,7 @@ class SSDHead(nn.Layer):
                     score_conv_name,
                     SepConvLayer(
                         in_channels=in_channels[i],
-                        out_channels=num_prior * num_classes,
+                        out_channels=num_prior * self.num_classes,
                         kernel_size=kernel_size,
                         padding=padding,
                         conv_decay=conv_decay,
@@ -129,8 +130,8 @@ class SSDHead(nn.Layer):
         box_preds = []
         cls_scores = []
         prior_boxes = []
-        for feat, box_conv, score_conv in zip(feats, self.box_convs,
-                                              self.score_convs):
+        for i, (feat, box_conv, score_conv
+                ) in enumerate(zip(feats, self.box_convs, self.score_convs)):
             box_pred = box_conv(feat)
             box_pred = paddle.transpose(box_pred, [0, 2, 3, 1])
             box_pred = paddle.reshape(box_pred, [0, -1, 4])
diff --git a/dygraph/ppdet/modeling/losses/ssd_loss.py b/dygraph/ppdet/modeling/losses/ssd_loss.py
index 8561a83cb..04ba75b64 100644
--- a/dygraph/ppdet/modeling/losses/ssd_loss.py
+++ b/dygraph/ppdet/modeling/losses/ssd_loss.py
@@ -114,7 +114,8 @@ class SSDLoss(nn.Layer):
         scores = paddle.concat(scores, axis=1)
         prior_boxes = paddle.concat(anchors, axis=0)
         gt_label = gt_class.unsqueeze(-1)
-        batch_size, num_priors, num_classes = scores.shape
+        batch_size, num_priors = scores.shape[:2]
+        num_classes = scores.shape[-1] - 1
 
         def _reshape_to_2d(x):
             return paddle.flatten(x, start_axis=2)
@@ -137,7 +138,8 @@ class SSDLoss(nn.Layer):
 
         # 2. Compute confidence for mining hard examples
         # 2.1. Get the target label based on matched indices
-        target_label, _ = self._label_target_assign(gt_label, matched_indices)
+        target_label, _ = self._label_target_assign(
+            gt_label, matched_indices, mismatch_value=num_classes)
         confidence = _reshape_to_2d(scores)
         # 2.2. Compute confidence loss.
         # Reshape confidence to 2D tensor.
@@ -173,7 +175,10 @@ class SSDLoss(nn.Layer):
             encoded_bbox, matched_indices)
         # 4.3. Assign classification targets
         target_label, target_conf_weight = self._label_target_assign(
-            gt_label, matched_indices, neg_mask=neg_mask)
+            gt_label,
+            matched_indices,
+            neg_mask=neg_mask,
+            mismatch_value=num_classes)
 
         # 5. Compute loss.
         # 5.1 Compute confidence loss.
-- 
GitLab