# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import json from collections import defaultdict import numpy as np from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval from ..modeling.keypoint_utils import oks_nms from scipy.io import loadmat, savemat __all__ = ['KeyPointTopDownCOCOEval', 'KeyPointTopDownMPIIEval'] class KeyPointTopDownCOCOEval(object): ''' Adapted from https://github.com/leoxiaobin/deep-high-resolution-net.pytorch Copyright (c) Microsoft, under the MIT License. ''' def __init__(self, anno_file, num_samples, num_joints, output_eval, iou_type='keypoints', in_vis_thre=0.2, oks_thre=0.9): super(KeyPointTopDownCOCOEval, self).__init__() self.coco = COCO(anno_file) self.num_samples = num_samples self.num_joints = num_joints self.iou_type = iou_type self.in_vis_thre = in_vis_thre self.oks_thre = oks_thre self.output_eval = output_eval self.res_file = os.path.join(output_eval, "keypoints_results.json") self.reset() def reset(self): self.results = { 'all_preds': np.zeros( (self.num_samples, self.num_joints, 3), dtype=np.float32), 'all_boxes': np.zeros((self.num_samples, 6)), 'image_path': [] } self.eval_results = {} self.idx = 0 def update(self, inputs, outputs): kpts, _ = outputs['keypoint'][0] num_images = inputs['image'].shape[0] self.results['all_preds'][self.idx:self.idx + num_images, :, 0: 3] = kpts[:, :, 0:3] self.results['all_boxes'][self.idx:self.idx + num_images, 0:2] = inputs[ 'center'].numpy()[:, 0:2] self.results['all_boxes'][self.idx:self.idx + num_images, 2:4] = inputs[ 'scale'].numpy()[:, 0:2] self.results['all_boxes'][self.idx:self.idx + num_images, 4] = np.prod( inputs['scale'].numpy() * 200, 1) self.results['all_boxes'][self.idx:self.idx + num_images, 5] = np.squeeze(inputs['score'].numpy()) self.results['image_path'].extend(inputs['im_id'].numpy()) self.idx += num_images def _write_coco_keypoint_results(self, keypoints): data_pack = [{ 'cat_id': 1, 'cls': 'person', 'ann_type': 'keypoints', 'keypoints': keypoints }] results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) if not os.path.exists(self.output_eval): os.makedirs(self.output_eval) with open(self.res_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4) try: json.load(open(self.res_file)) except Exception: content = [] with open(self.res_file, 'r') as f: for line in f: content.append(line) content[-1] = ']' with open(self.res_file, 'w') as f: for c in content: f.write(c) def _coco_keypoint_results_one_category_kernel(self, data_pack): cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpts[k]['keypoints'] for k in range(len(img_kpts))]) _key_points = _key_points.reshape(_key_points.shape[0], -1) result = [{ 'image_id': img_kpts[k]['image'], 'category_id': cat_id, 'keypoints': _key_points[k].tolist(), 'score': img_kpts[k]['score'], 'center': list(img_kpts[k]['center']), 'scale': list(img_kpts[k]['scale']) } for k in range(len(img_kpts))] cat_results.extend(result) return cat_results def get_final_results(self, preds, all_boxes, img_path): _kpts = [] for idx, kpt in enumerate(preds): _kpts.append({ 'keypoints': kpt, 'center': all_boxes[idx][0:2], 'scale': all_boxes[idx][2:4], 'area': all_boxes[idx][4], 'score': all_boxes[idx][5], 'image': int(img_path[idx]) }) # image x person x (keypoints) kpts = defaultdict(list) for kpt in _kpts: kpts[kpt['image']].append(kpt) # rescoring and oks nms num_joints = preds.shape[1] in_vis_thre = self.in_vis_thre oks_thre = self.oks_thre oks_nmsed_kpts = [] for img in kpts.keys(): img_kpts = kpts[img] for n_p in img_kpts: box_score = n_p['score'] kpt_score = 0 valid_num = 0 for n_jt in range(0, num_joints): t_s = n_p['keypoints'][n_jt][2] if t_s > in_vis_thre: kpt_score = kpt_score + t_s valid_num = valid_num + 1 if valid_num != 0: kpt_score = kpt_score / valid_num # rescoring n_p['score'] = kpt_score * box_score keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))], oks_thre) if len(keep) == 0: oks_nmsed_kpts.append(img_kpts) else: oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) self._write_coco_keypoint_results(oks_nmsed_kpts) def accumulate(self): self.get_final_results(self.results['all_preds'], self.results['all_boxes'], self.results['image_path']) coco_dt = self.coco.loadRes(self.res_file) coco_eval = COCOeval(self.coco, coco_dt, 'keypoints') coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() keypoint_stats = [] for ind in range(len(coco_eval.stats)): keypoint_stats.append((coco_eval.stats[ind])) self.eval_results['keypoint'] = keypoint_stats def log(self): stats_names = [ 'AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] num_values = len(stats_names) print(' '.join(['| {}'.format(name) for name in stats_names]) + ' |') print('|---' * (num_values + 1) + '|') print(' '.join([ '| {:.3f}'.format(value) for value in self.eval_results['keypoint'] ]) + ' |') def get_results(self): return self.eval_results class KeyPointTopDownMPIIEval(object): def __init__(self, anno_file, num_samples, num_joints, output_eval, oks_thre=0.9): super(KeyPointTopDownMPIIEval, self).__init__() self.ann_file = anno_file self.reset() def reset(self): self.results = [] self.eval_results = {} self.idx = 0 def update(self, inputs, outputs): kpts, _ = outputs['keypoint'][0] num_images = inputs['image'].shape[0] results = {} results['preds'] = kpts[:, :, 0:3] results['boxes'] = np.zeros((num_images, 6)) results['boxes'][:, 0:2] = inputs['center'].numpy()[:, 0:2] results['boxes'][:, 2:4] = inputs['scale'].numpy()[:, 0:2] results['boxes'][:, 4] = np.prod(inputs['scale'].numpy() * 200, 1) results['boxes'][:, 5] = np.squeeze(inputs['score'].numpy()) results['image_path'] = inputs['image_file'] self.results.append(results) def accumulate(self): self.eval_results = self.evaluate(self.results) def log(self): for item, value in self.eval_results.items(): print("{} : {}".format(item, value)) def get_results(self): return self.eval_results def evaluate(self, outputs, savepath=None): """Evaluate PCKh for MPII dataset. Adapted from https://github.com/leoxiaobin/deep-high-resolution-net.pytorch Copyright (c) Microsoft, under the MIT License. Args: outputs(list(preds, boxes)): * preds (np.ndarray[N,K,3]): The first two dimensions are coordinates, score is the third dimension of the array. * boxes (np.ndarray[N,6]): [center[0], center[1], scale[0] , scale[1],area, score] Returns: dict: PCKh for each joint """ kpts = [] for output in outputs: preds = output['preds'] batch_size = preds.shape[0] for i in range(batch_size): kpts.append({'keypoints': preds[i]}) preds = np.stack([kpt['keypoints'] for kpt in kpts]) # convert 0-based index to 1-based index, # and get the first two dimensions. preds = preds[..., :2] + 1.0 if savepath is not None: pred_file = os.path.join(savepath, 'pred.mat') savemat(pred_file, mdict={'preds': preds}) SC_BIAS = 0.6 threshold = 0.5 gt_file = os.path.join( os.path.dirname(self.ann_file), 'mpii_gt_val.mat') gt_dict = loadmat(gt_file) dataset_joints = gt_dict['dataset_joints'] jnt_missing = gt_dict['jnt_missing'] pos_gt_src = gt_dict['pos_gt_src'] headboxes_src = gt_dict['headboxes_src'] pos_pred_src = np.transpose(preds, [1, 2, 0]) head = np.where(dataset_joints == 'head')[1][0] lsho = np.where(dataset_joints == 'lsho')[1][0] lelb = np.where(dataset_joints == 'lelb')[1][0] lwri = np.where(dataset_joints == 'lwri')[1][0] lhip = np.where(dataset_joints == 'lhip')[1][0] lkne = np.where(dataset_joints == 'lkne')[1][0] lank = np.where(dataset_joints == 'lank')[1][0] rsho = np.where(dataset_joints == 'rsho')[1][0] relb = np.where(dataset_joints == 'relb')[1][0] rwri = np.where(dataset_joints == 'rwri')[1][0] rkne = np.where(dataset_joints == 'rkne')[1][0] rank = np.where(dataset_joints == 'rank')[1][0] rhip = np.where(dataset_joints == 'rhip')[1][0] jnt_visible = 1 - jnt_missing uv_error = pos_pred_src - pos_gt_src uv_err = np.linalg.norm(uv_error, axis=1) headsizes = headboxes_src[1, :, :] - headboxes_src[0, :, :] headsizes = np.linalg.norm(headsizes, axis=0) headsizes *= SC_BIAS scale = headsizes * np.ones((len(uv_err), 1), dtype=np.float32) scaled_uv_err = uv_err / scale scaled_uv_err = scaled_uv_err * jnt_visible jnt_count = np.sum(jnt_visible, axis=1) less_than_threshold = (scaled_uv_err <= threshold) * jnt_visible PCKh = 100. * np.sum(less_than_threshold, axis=1) / jnt_count # save rng = np.arange(0, 0.5 + 0.01, 0.01) pckAll = np.zeros((len(rng), 16), dtype=np.float32) for r, threshold in enumerate(rng): less_than_threshold = (scaled_uv_err <= threshold) * jnt_visible pckAll[r, :] = 100. * np.sum(less_than_threshold, axis=1) / jnt_count PCKh = np.ma.array(PCKh, mask=False) PCKh.mask[6:8] = True jnt_count = np.ma.array(jnt_count, mask=False) jnt_count.mask[6:8] = True jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64) name_value = [ #noqa ('Head', PCKh[head]), ('Shoulder', 0.5 * (PCKh[lsho] + PCKh[rsho])), ('Elbow', 0.5 * (PCKh[lelb] + PCKh[relb])), ('Wrist', 0.5 * (PCKh[lwri] + PCKh[rwri])), ('Hip', 0.5 * (PCKh[lhip] + PCKh[rhip])), ('Knee', 0.5 * (PCKh[lkne] + PCKh[rkne])), ('Ankle', 0.5 * (PCKh[lank] + PCKh[rank])), ('PCKh', np.sum(PCKh * jnt_ratio)), ('PCKh@0.1', np.sum(pckAll[11, :] * jnt_ratio)) ] name_value = OrderedDict(name_value) return name_value def _sort_and_unique_bboxes(self, kpts, key='bbox_id'): """sort kpts and remove the repeated ones.""" kpts = sorted(kpts, key=lambda x: x[key]) num = len(kpts) for i in range(num - 1, 0, -1): if kpts[i][key] == kpts[i - 1][key]: del kpts[i] return kpts