add hrnet mdoeling; (#2889)

rename all Hrnet to HRNet

add hrnet mdoeling; (#2889)
rename all Hrnet to HRNet
90bfe009 · zhiboniu · GitHub · 01be6a1b · 90bfe009 · 90bfe009
6 changed file
--- a/ppdet/modeling/architectures/__init__.py
+++ b/ppdet/modeling/architectures/__init__.py
@@ -16,6 +16,7 @@ from . import solov2
 from . import ttfnet
 from . import s2anet
 from . import keypoint_hrhrnet
+from . import keypoint_hrnet

 from .meta_arch import *
 from .faster_rcnn import *
@@ -28,3 +29,4 @@ from .solov2 import *
 from .ttfnet import *
 from .s2anet import *
 from .keypoint_hrhrnet import *
+from .keypoint_hrnet import *
--- a/ppdet/modeling/architectures/keypoint_hrhrnet.py
+++ b/ppdet/modeling/architectures/keypoint_hrhrnet.py
@@ -26,33 +26,33 @@ from .meta_arch import BaseArch
 from .. import layers as L
 from ..keypoint_utils import transpred

-__all__ = ['HigherHrnet']
+__all__ = ['HigherHRNet']


 @register
-class HigherHrnet(BaseArch):
+class HigherHRNet(BaseArch):
    __category__ = 'architecture'

    def __init__(self,
-                 backbone='Hrnet',
-                 hrhrnet_head='HigherHrnetHead',
-                 post_process='HrHrnetPostProcess',
+                 backbone='HRNet',
+                 hrhrnet_head='HigherHRNetHead',
+                 post_process='HrHRNetPostProcess',
                 eval_flip=True,
                 flip_perm=None,
                 max_num_people=30):
        """
-        HigherHrnet network, see https://arxiv.org/abs/1908.10357；
-        HigherHrnet+swahr, see https://arxiv.org/abs/2012.15175
+        HigherHRNet network, see https://arxiv.org/abs/1908.10357；
+        HigherHRNet+swahr, see https://arxiv.org/abs/2012.15175

        Args:
            backbone (nn.Layer): backbone instance
            hrhrnet_head (nn.Layer): keypoint_head instance
            bbox_post_process (object): `BBoxPostProcess` instance
        """
-        super(HigherHrnet, self).__init__()
+        super(HigherHRNet, self).__init__()
        self.backbone = backbone
        self.hrhrnet_head = hrhrnet_head
-        self.post_process = HrHrnetPostProcess()
+        self.post_process = HrHRNetPostProcess()
        self.flip = eval_flip
        self.flip_perm = paddle.to_tensor(flip_perm)
        self.deploy = False
@@ -140,9 +140,9 @@ class HigherHrnet(BaseArch):

 @register
 @serializable
-class HrHrnetPostProcess(object):
+class HrHRNetPostProcess(object):
    '''
-    HrHrnet postprocess contain:
+    HrHRNet postprocess contain:
        1) get topk keypoints in the output heatmap
        2) sample the tagmap's value corresponding to each of the topk coordinate
        3) match different joints to combine to some people with Hungary algorithm

--- a/ppdet/modeling/architectures/keypoint_hrnet.py
+++ b/ppdet/modeling/architectures/keypoint_hrnet.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License. 
+# You may obtain a copy of the License at 
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and 
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import numpy as np
+import math
+from ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+from ..keypoint_utils import transform_preds
+from .. import layers as L
+
+__all__ = ['TopDownHRNet']
+
+
+@register
+class TopDownHRNet(BaseArch):
+    __category__ = 'architecture'
+    __inject__ = ['loss']
+
+    def __init__(self,
+                 width,
+                 num_joints,
+                 backbone='HRNet',
+                 loss='KeyPointMSELoss',
+                 post_process='HRNetPostProcess',
+                 flip_perm=None,
+                 flip=False,
+                 shift_heatmap=True):
+        """
+        HRNnet network, see https://arxiv.org/abs/1902.09212
+
+        Args:
+            backbone (nn.Layer): backbone instance
+            post_process (object): `HRNetPostProcess` instance
+            flip_perm (list): The left-right joints exchange order list
+        """
+        super(TopDownHRNet, self).__init__()
+        self.backbone = backbone
+        self.post_process = HRNetPostProcess()
+        self.loss = loss
+        self.flip_perm = flip_perm
+        self.flip = flip
+        self.final_conv = L.Conv2d(width, num_joints, 1, 1, 0, bias=True)
+        self.shift_heatmap = shift_heatmap
+
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        # backbone
+        backbone = create(cfg['backbone'])
+
+        return {'backbone': backbone, }
+
+    def _forward(self):
+        feats = self.backbone(self.inputs)
+        hrnet_outputs = self.final_conv(feats[0])
+
+        if self.training:
+            return self.loss(hrnet_outputs, self.inputs)
+        else:
+            if self.flip:
+                self.inputs['image'] = self.inputs['image'].flip([3])
+                feats = backbone(inputs)
+                output_flipped = self.final_conv(feats)
+                output_flipped = self.flip_back(output_flipped.numpy(),
+                                                flip_perm)
+                output_flipped = paddle.to_tensor(output_flipped.copy())
+                if self.shift_heatmap:
+                    output_flipped[:, :, :, 1:] = output_flipped.clone(
+                    )[:, :, :, 0:-1]
+                output = (output + output_flipped) * 0.5
+            preds, maxvals = self.post_process(hrnet_outputs, self.inputs)
+            return preds, maxvals
+
+    def get_loss(self):
+        return self._forward()
+
+    def get_pred(self):
+        preds, maxvals = self._forward()
+        output = {'kpt_coord': preds, 'kpt_score': maxvals}
+        return output
+
+
+class HRNetPostProcess(object):
+    def flip_back(self, output_flipped, matched_parts):
+        assert output_flipped.ndim == 4,\
+                'output_flipped should be [batch_size, num_joints, height, width]'
+
+        output_flipped = output_flipped[:, :, :, ::-1]
+
+        for pair in matched_parts:
+            tmp = output_flipped[:, pair[0], :, :].copy()
+            output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
+            output_flipped[:, pair[1], :, :] = tmp
+
+        return output_flipped
+
+    def get_max_preds(self, heatmaps):
+        '''get predictions from score maps
+
+        Args:
+            heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
+
+        Returns:
+            preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
+            maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints
+        '''
+        assert isinstance(heatmaps,
+                          np.ndarray), 'heatmaps should be numpy.ndarray'
+        assert heatmaps.ndim == 4, 'batch_images should be 4-ndim'
+
+        batch_size = heatmaps.shape[0]
+        num_joints = heatmaps.shape[1]
+        width = heatmaps.shape[3]
+        heatmaps_reshaped = heatmaps.reshape((batch_size, num_joints, -1))
+        idx = np.argmax(heatmaps_reshaped, 2)
+        maxvals = np.amax(heatmaps_reshaped, 2)
+
+        maxvals = maxvals.reshape((batch_size, num_joints, 1))
+        idx = idx.reshape((batch_size, num_joints, 1))
+
+        preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
+
+        preds[:, :, 0] = (preds[:, :, 0]) % width
+        preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
+
+        pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
+        pred_mask = pred_mask.astype(np.float32)
+
+        preds *= pred_mask
+
+        return preds, maxvals
+
+    def get_final_preds(self, heatmaps, center, scale):
+        """the highest heatvalue location with a quarter offset in the
+        direction from the highest response to the second highest response.
+
+        Args:
+            heatmaps (numpy.ndarray): The predicted heatmaps
+            center (numpy.ndarray): The boxes center
+            scale (numpy.ndarray): The scale factor
+
+        Returns:
+            preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
+            maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints
+        """
+
+        coords, maxvals = self.get_max_preds(heatmaps)
+
+        heatmap_height = heatmaps.shape[2]
+        heatmap_width = heatmaps.shape[3]
+
+        for n in range(coords.shape[0]):
+            for p in range(coords.shape[1]):
+                hm = heatmaps[n][p]
+                px = int(math.floor(coords[n][p][0] + 0.5))
+                py = int(math.floor(coords[n][p][1] + 0.5))
+                if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1:
+                    diff = np.array([
+                        hm[py][px + 1] - hm[py][px - 1],
+                        hm[py + 1][px] - hm[py - 1][px]
+                    ])
+                    coords[n][p] += np.sign(diff) * .25
+        preds = coords.copy()
+
+        # Transform back
+        for i in range(coords.shape[0]):
+            preds[i] = transform_preds(coords[i], center[i], scale[i],
+                                       [heatmap_width, heatmap_height])
+
+        return preds, maxvals
+
+    def __call__(self, output, inputs):
+        preds, maxvals = self.get_final_preds(
+            output.numpy(), inputs['center'].numpy(), inputs['scale'].numpy())
+
+        return preds, maxvals
--- a/ppdet/modeling/heads/keypoint_hrhrnet_head.py
+++ b/ppdet/modeling/heads/keypoint_hrhrnet_head.py
@@ -21,20 +21,20 @@ from ..backbones.hrnet import BasicBlock


 @register
-class HrHrnetHead(nn.Layer):
+class HrHRNetHead(nn.Layer):
    __inject__ = ['loss']

-    def __init__(self, num_joints, loss='HrHrnetLoss', swahr=False, width=32):
+    def __init__(self, num_joints, loss='HrHRNetLoss', swahr=False, width=32):
        """
-        Head for HigherHrnet network
+        Head for HigherHRNet network

        Args:
            num_joints (int): number of keypoints
-            hrloss (object): HrHrnetLoss instance
+            hrloss (object): HrHRNetLoss instance
            swahr (bool): whether to use swahr
            width (int): hrnet channel width
        """
-        super(HrHrnetHead, self).__init__()
+        super(HrHRNetHead, self).__init__()
        self.loss = loss

        self.num_joints = num_joints
@@ -53,7 +53,7 @@ class HrHrnetHead(nn.Layer):
            num_filters=width,
            has_se=False,
            freeze_norm=False,
-            name='HrHrnetHead_{}'.format(i)) for i in range(4)))
+            name='HrHRNetHead_{}'.format(i)) for i in range(4)))

        self.interpolate = L.Upsample(2, mode='bilinear')
        self.concat = L.Concat(dim=1)

--- a/ppdet/modeling/keypoint_utils.py
+++ b/ppdet/modeling/keypoint_utils.py
@@ -158,3 +158,145 @@ def warp_affine_joints(joints, mat):
    return np.dot(np.concatenate(
        (joints, joints[:, 0:1] * 0 + 1), axis=1),
                  mat.T).reshape(shape)
+
+
+def affine_transform(pt, t):
+    new_pt = np.array([pt[0], pt[1], 1.]).T
+    new_pt = np.dot(t, new_pt)
+    return new_pt[:2]
+
+
+def transform_preds(coords, center, scale, output_size):
+    target_coords = np.zeros(coords.shape)
+    trans = get_affine_transform(center, scale * 200, 0, output_size, inv=1)
+    for p in range(coords.shape[0]):
+        target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
+    return target_coords
+
+
+def oks_iou(g, d, a_g, a_d, sigmas=None, in_vis_thre=None):
+    if not isinstance(sigmas, np.ndarray):
+        sigmas = np.array([
+            .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07,
+            .87, .87, .89, .89
+        ]) / 10.0
+    vars = (sigmas * 2)**2
+    xg = g[0::3]
+    yg = g[1::3]
+    vg = g[2::3]
+    ious = np.zeros((d.shape[0]))
+    for n_d in range(0, d.shape[0]):
+        xd = d[n_d, 0::3]
+        yd = d[n_d, 1::3]
+        vd = d[n_d, 2::3]
+        dx = xd - xg
+        dy = yd - yg
+        e = (dx**2 + dy**2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2
+        if in_vis_thre is not None:
+            ind = list(vg > in_vis_thre) and list(vd > in_vis_thre)
+            e = e[ind]
+        ious[n_d] = np.sum(np.exp(-e)) / e.shape[0] if e.shape[0] != 0 else 0.0
+    return ious
+
+
+def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
+    """greedily select boxes with high confidence and overlap with current maximum <= thresh
+    rule out overlap >= thresh
+
+    Args:
+        kpts_db (list): The predicted keypoints within the image
+        thresh (float): The threshold to select the boxes
+        sigmas (np.array): The variance to calculate the oks iou
+            Default: None
+        in_vis_thre (float): The threshold to select the high confidence boxes
+            Default: None
+
+    Return:
+        keep (list): indexes to keep
+    """
+
+    if len(kpts_db) == 0:
+        return []
+
+    scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
+    kpts = np.array(
+        [kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
+    areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
+
+    order = scores.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+
+        oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]],
+                          sigmas, in_vis_thre)
+
+        inds = np.where(oks_ovr <= thresh)[0]
+        order = order[inds + 1]
+
+    return keep
+
+
+def rescore(overlap, scores, thresh, type='gaussian'):
+    assert overlap.shape[0] == scores.shape[0]
+    if type == 'linear':
+        inds = np.where(overlap >= thresh)[0]
+        scores[inds] = scores[inds] * (1 - overlap[inds])
+    else:
+        scores = scores * np.exp(-overlap**2 / thresh)
+
+    return scores
+
+
+def soft_oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
+    """greedily select boxes with high confidence and overlap with current maximum <= thresh
+    rule out overlap >= thresh
+
+    Args:
+        kpts_db (list): The predicted keypoints within the image
+        thresh (float): The threshold to select the boxes
+        sigmas (np.array): The variance to calculate the oks iou
+            Default: None
+        in_vis_thre (float): The threshold to select the high confidence boxes
+            Default: None
+
+    Return:
+        keep (list): indexes to keep
+    """
+
+    if len(kpts_db) == 0:
+        return []
+
+    scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
+    kpts = np.array(
+        [kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
+    areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
+
+    order = scores.argsort()[::-1]
+    scores = scores[order]
+
+    # max_dets = order.size
+    max_dets = 20
+    keep = np.zeros(max_dets, dtype=np.intp)
+    keep_cnt = 0
+    while order.size > 0 and keep_cnt < max_dets:
+        i = order[0]
+
+        oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]],
+                          sigmas, in_vis_thre)
+
+        order = order[1:]
+        scores = rescore(oks_ovr, scores[1:], thresh)
+
+        tmp = scores.argsort()[::-1]
+        order = order[tmp]
+        scores = scores[tmp]
+
+        keep[keep_cnt] = i
+        keep_cnt += 1
+
+    keep = keep[:keep_cnt]
+
+    return keep
--- a/ppdet/modeling/losses/keypoint_loss.py
+++ b/ppdet/modeling/losses/keypoint_loss.py
@@ -23,20 +23,59 @@ import paddle.nn as nn
 import paddle.nn.functional as F
 from ppdet.core.workspace import register, serializable

-__all__ = ['HrHrnetLoss']
+__all__ = ['HrHRNetLoss', 'KeyPointMSELoss']


 @register
 @serializable
-class HrHrnetLoss(nn.Layer):
+class KeyPointMSELoss(nn.Layer):
+    def __init__(self, use_target_weight=True):
+        """
+        KeyPointMSELoss layer
+
+        Args:
+            use_target_weight (bool): whether to use target weight
+        """
+        super(KeyPointMSELoss, self).__init__()
+        self.criterion = nn.MSELoss(reduction='mean')
+        self.use_target_weight = use_target_weight
+
+    def forward(self, output, records):
+        target = records['target']
+        target_weight = records['target_weight']
+        batch_size = output.shape[0]
+        num_joints = output.shape[1]
+        heatmaps_pred = output.reshape(
+            (batch_size, num_joints, -1)).split(num_joints, 1)
+        heatmaps_gt = target.reshape(
+            (batch_size, num_joints, -1)).split(num_joints, 1)
+        loss = 0
+
+        for idx in range(num_joints):
+            heatmap_pred = heatmaps_pred[idx].squeeze()
+            heatmap_gt = heatmaps_gt[idx].squeeze()
+            if self.use_target_weight:
+                loss += 0.5 * self.criterion(
+                    heatmap_pred.multiply(target_weight[:, idx]),
+                    heatmap_gt.multiply(target_weight[:, idx]))
+            else:
+                loss += 0.5 * self.criterion(heatmap_pred, heatmap_gt)
+        keypoint_losses = dict()
+        keypoint_losses['loss'] = loss / num_joints
+        return keypoint_losses
+
+
+@register
+@serializable
+class HrHRNetLoss(nn.Layer):
    def __init__(self, num_joints, swahr):
        """
-        HrHrnetLoss layer
+        HrHRNetLoss layer

        Args:
            num_joints (int): number of keypoints
        """
-        super(HrHrnetLoss, self).__init__()
+        super(HrHRNetLoss, self).__init__()
        if swahr:
            self.heatmaploss = HeatMapSWAHRLoss(num_joints)
        else: