未验证 提交 90bfe009 编写于 作者: Z zhiboniu 提交者: GitHub

add hrnet mdoeling; (#2889)

rename all Hrnet to HRNet
上级 01be6a1b
......@@ -16,6 +16,7 @@ from . import solov2
from . import ttfnet
from . import s2anet
from . import keypoint_hrhrnet
from . import keypoint_hrnet
from .meta_arch import *
from .faster_rcnn import *
......@@ -28,3 +29,4 @@ from .solov2 import *
from .ttfnet import *
from .s2anet import *
from .keypoint_hrhrnet import *
from .keypoint_hrnet import *
......@@ -26,33 +26,33 @@ from .meta_arch import BaseArch
from .. import layers as L
from ..keypoint_utils import transpred
__all__ = ['HigherHrnet']
__all__ = ['HigherHRNet']
@register
class HigherHrnet(BaseArch):
class HigherHRNet(BaseArch):
__category__ = 'architecture'
def __init__(self,
backbone='Hrnet',
hrhrnet_head='HigherHrnetHead',
post_process='HrHrnetPostProcess',
backbone='HRNet',
hrhrnet_head='HigherHRNetHead',
post_process='HrHRNetPostProcess',
eval_flip=True,
flip_perm=None,
max_num_people=30):
"""
HigherHrnet network, see https://arxiv.org/abs/1908.10357;
HigherHrnet+swahr, see https://arxiv.org/abs/2012.15175
HigherHRNet network, see https://arxiv.org/abs/1908.10357;
HigherHRNet+swahr, see https://arxiv.org/abs/2012.15175
Args:
backbone (nn.Layer): backbone instance
hrhrnet_head (nn.Layer): keypoint_head instance
bbox_post_process (object): `BBoxPostProcess` instance
"""
super(HigherHrnet, self).__init__()
super(HigherHRNet, self).__init__()
self.backbone = backbone
self.hrhrnet_head = hrhrnet_head
self.post_process = HrHrnetPostProcess()
self.post_process = HrHRNetPostProcess()
self.flip = eval_flip
self.flip_perm = paddle.to_tensor(flip_perm)
self.deploy = False
......@@ -140,9 +140,9 @@ class HigherHrnet(BaseArch):
@register
@serializable
class HrHrnetPostProcess(object):
class HrHRNetPostProcess(object):
'''
HrHrnet postprocess contain:
HrHRNet postprocess contain:
1) get topk keypoints in the output heatmap
2) sample the tagmap's value corresponding to each of the topk coordinate
3) match different joints to combine to some people with Hungary algorithm
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import numpy as np
import math
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
from ..keypoint_utils import transform_preds
from .. import layers as L
__all__ = ['TopDownHRNet']
@register
class TopDownHRNet(BaseArch):
__category__ = 'architecture'
__inject__ = ['loss']
def __init__(self,
width,
num_joints,
backbone='HRNet',
loss='KeyPointMSELoss',
post_process='HRNetPostProcess',
flip_perm=None,
flip=False,
shift_heatmap=True):
"""
HRNnet network, see https://arxiv.org/abs/1902.09212
Args:
backbone (nn.Layer): backbone instance
post_process (object): `HRNetPostProcess` instance
flip_perm (list): The left-right joints exchange order list
"""
super(TopDownHRNet, self).__init__()
self.backbone = backbone
self.post_process = HRNetPostProcess()
self.loss = loss
self.flip_perm = flip_perm
self.flip = flip
self.final_conv = L.Conv2d(width, num_joints, 1, 1, 0, bias=True)
self.shift_heatmap = shift_heatmap
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
return {'backbone': backbone, }
def _forward(self):
feats = self.backbone(self.inputs)
hrnet_outputs = self.final_conv(feats[0])
if self.training:
return self.loss(hrnet_outputs, self.inputs)
else:
if self.flip:
self.inputs['image'] = self.inputs['image'].flip([3])
feats = backbone(inputs)
output_flipped = self.final_conv(feats)
output_flipped = self.flip_back(output_flipped.numpy(),
flip_perm)
output_flipped = paddle.to_tensor(output_flipped.copy())
if self.shift_heatmap:
output_flipped[:, :, :, 1:] = output_flipped.clone(
)[:, :, :, 0:-1]
output = (output + output_flipped) * 0.5
preds, maxvals = self.post_process(hrnet_outputs, self.inputs)
return preds, maxvals
def get_loss(self):
return self._forward()
def get_pred(self):
preds, maxvals = self._forward()
output = {'kpt_coord': preds, 'kpt_score': maxvals}
return output
class HRNetPostProcess(object):
def flip_back(self, output_flipped, matched_parts):
assert output_flipped.ndim == 4,\
'output_flipped should be [batch_size, num_joints, height, width]'
output_flipped = output_flipped[:, :, :, ::-1]
for pair in matched_parts:
tmp = output_flipped[:, pair[0], :, :].copy()
output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
output_flipped[:, pair[1], :, :] = tmp
return output_flipped
def get_max_preds(self, heatmaps):
'''get predictions from score maps
Args:
heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
Returns:
preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints
'''
assert isinstance(heatmaps,
np.ndarray), 'heatmaps should be numpy.ndarray'
assert heatmaps.ndim == 4, 'batch_images should be 4-ndim'
batch_size = heatmaps.shape[0]
num_joints = heatmaps.shape[1]
width = heatmaps.shape[3]
heatmaps_reshaped = heatmaps.reshape((batch_size, num_joints, -1))
idx = np.argmax(heatmaps_reshaped, 2)
maxvals = np.amax(heatmaps_reshaped, 2)
maxvals = maxvals.reshape((batch_size, num_joints, 1))
idx = idx.reshape((batch_size, num_joints, 1))
preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
preds[:, :, 0] = (preds[:, :, 0]) % width
preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
pred_mask = pred_mask.astype(np.float32)
preds *= pred_mask
return preds, maxvals
def get_final_preds(self, heatmaps, center, scale):
"""the highest heatvalue location with a quarter offset in the
direction from the highest response to the second highest response.
Args:
heatmaps (numpy.ndarray): The predicted heatmaps
center (numpy.ndarray): The boxes center
scale (numpy.ndarray): The scale factor
Returns:
preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints
"""
coords, maxvals = self.get_max_preds(heatmaps)
heatmap_height = heatmaps.shape[2]
heatmap_width = heatmaps.shape[3]
for n in range(coords.shape[0]):
for p in range(coords.shape[1]):
hm = heatmaps[n][p]
px = int(math.floor(coords[n][p][0] + 0.5))
py = int(math.floor(coords[n][p][1] + 0.5))
if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1:
diff = np.array([
hm[py][px + 1] - hm[py][px - 1],
hm[py + 1][px] - hm[py - 1][px]
])
coords[n][p] += np.sign(diff) * .25
preds = coords.copy()
# Transform back
for i in range(coords.shape[0]):
preds[i] = transform_preds(coords[i], center[i], scale[i],
[heatmap_width, heatmap_height])
return preds, maxvals
def __call__(self, output, inputs):
preds, maxvals = self.get_final_preds(
output.numpy(), inputs['center'].numpy(), inputs['scale'].numpy())
return preds, maxvals
......@@ -21,20 +21,20 @@ from ..backbones.hrnet import BasicBlock
@register
class HrHrnetHead(nn.Layer):
class HrHRNetHead(nn.Layer):
__inject__ = ['loss']
def __init__(self, num_joints, loss='HrHrnetLoss', swahr=False, width=32):
def __init__(self, num_joints, loss='HrHRNetLoss', swahr=False, width=32):
"""
Head for HigherHrnet network
Head for HigherHRNet network
Args:
num_joints (int): number of keypoints
hrloss (object): HrHrnetLoss instance
hrloss (object): HrHRNetLoss instance
swahr (bool): whether to use swahr
width (int): hrnet channel width
"""
super(HrHrnetHead, self).__init__()
super(HrHRNetHead, self).__init__()
self.loss = loss
self.num_joints = num_joints
......@@ -53,7 +53,7 @@ class HrHrnetHead(nn.Layer):
num_filters=width,
has_se=False,
freeze_norm=False,
name='HrHrnetHead_{}'.format(i)) for i in range(4)))
name='HrHRNetHead_{}'.format(i)) for i in range(4)))
self.interpolate = L.Upsample(2, mode='bilinear')
self.concat = L.Concat(dim=1)
......
......@@ -158,3 +158,145 @@ def warp_affine_joints(joints, mat):
return np.dot(np.concatenate(
(joints, joints[:, 0:1] * 0 + 1), axis=1),
mat.T).reshape(shape)
def affine_transform(pt, t):
new_pt = np.array([pt[0], pt[1], 1.]).T
new_pt = np.dot(t, new_pt)
return new_pt[:2]
def transform_preds(coords, center, scale, output_size):
target_coords = np.zeros(coords.shape)
trans = get_affine_transform(center, scale * 200, 0, output_size, inv=1)
for p in range(coords.shape[0]):
target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
return target_coords
def oks_iou(g, d, a_g, a_d, sigmas=None, in_vis_thre=None):
if not isinstance(sigmas, np.ndarray):
sigmas = np.array([
.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07,
.87, .87, .89, .89
]) / 10.0
vars = (sigmas * 2)**2
xg = g[0::3]
yg = g[1::3]
vg = g[2::3]
ious = np.zeros((d.shape[0]))
for n_d in range(0, d.shape[0]):
xd = d[n_d, 0::3]
yd = d[n_d, 1::3]
vd = d[n_d, 2::3]
dx = xd - xg
dy = yd - yg
e = (dx**2 + dy**2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2
if in_vis_thre is not None:
ind = list(vg > in_vis_thre) and list(vd > in_vis_thre)
e = e[ind]
ious[n_d] = np.sum(np.exp(-e)) / e.shape[0] if e.shape[0] != 0 else 0.0
return ious
def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
"""greedily select boxes with high confidence and overlap with current maximum <= thresh
rule out overlap >= thresh
Args:
kpts_db (list): The predicted keypoints within the image
thresh (float): The threshold to select the boxes
sigmas (np.array): The variance to calculate the oks iou
Default: None
in_vis_thre (float): The threshold to select the high confidence boxes
Default: None
Return:
keep (list): indexes to keep
"""
if len(kpts_db) == 0:
return []
scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
kpts = np.array(
[kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]],
sigmas, in_vis_thre)
inds = np.where(oks_ovr <= thresh)[0]
order = order[inds + 1]
return keep
def rescore(overlap, scores, thresh, type='gaussian'):
assert overlap.shape[0] == scores.shape[0]
if type == 'linear':
inds = np.where(overlap >= thresh)[0]
scores[inds] = scores[inds] * (1 - overlap[inds])
else:
scores = scores * np.exp(-overlap**2 / thresh)
return scores
def soft_oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
"""greedily select boxes with high confidence and overlap with current maximum <= thresh
rule out overlap >= thresh
Args:
kpts_db (list): The predicted keypoints within the image
thresh (float): The threshold to select the boxes
sigmas (np.array): The variance to calculate the oks iou
Default: None
in_vis_thre (float): The threshold to select the high confidence boxes
Default: None
Return:
keep (list): indexes to keep
"""
if len(kpts_db) == 0:
return []
scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
kpts = np.array(
[kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
order = scores.argsort()[::-1]
scores = scores[order]
# max_dets = order.size
max_dets = 20
keep = np.zeros(max_dets, dtype=np.intp)
keep_cnt = 0
while order.size > 0 and keep_cnt < max_dets:
i = order[0]
oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]],
sigmas, in_vis_thre)
order = order[1:]
scores = rescore(oks_ovr, scores[1:], thresh)
tmp = scores.argsort()[::-1]
order = order[tmp]
scores = scores[tmp]
keep[keep_cnt] = i
keep_cnt += 1
keep = keep[:keep_cnt]
return keep
......@@ -23,20 +23,59 @@ import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register, serializable
__all__ = ['HrHrnetLoss']
__all__ = ['HrHRNetLoss', 'KeyPointMSELoss']
@register
@serializable
class HrHrnetLoss(nn.Layer):
class KeyPointMSELoss(nn.Layer):
def __init__(self, use_target_weight=True):
"""
KeyPointMSELoss layer
Args:
use_target_weight (bool): whether to use target weight
"""
super(KeyPointMSELoss, self).__init__()
self.criterion = nn.MSELoss(reduction='mean')
self.use_target_weight = use_target_weight
def forward(self, output, records):
target = records['target']
target_weight = records['target_weight']
batch_size = output.shape[0]
num_joints = output.shape[1]
heatmaps_pred = output.reshape(
(batch_size, num_joints, -1)).split(num_joints, 1)
heatmaps_gt = target.reshape(
(batch_size, num_joints, -1)).split(num_joints, 1)
loss = 0
for idx in range(num_joints):
heatmap_pred = heatmaps_pred[idx].squeeze()
heatmap_gt = heatmaps_gt[idx].squeeze()
if self.use_target_weight:
loss += 0.5 * self.criterion(
heatmap_pred.multiply(target_weight[:, idx]),
heatmap_gt.multiply(target_weight[:, idx]))
else:
loss += 0.5 * self.criterion(heatmap_pred, heatmap_gt)
keypoint_losses = dict()
keypoint_losses['loss'] = loss / num_joints
return keypoint_losses
@register
@serializable
class HrHRNetLoss(nn.Layer):
def __init__(self, num_joints, swahr):
"""
HrHrnetLoss layer
HrHRNetLoss layer
Args:
num_joints (int): number of keypoints
"""
super(HrHrnetLoss, self).__init__()
super(HrHRNetLoss, self).__init__()
if swahr:
self.heatmaploss = HeatMapSWAHRLoss(num_joints)
else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册