diff --git a/ppdet/data/source/keypoint_coco.py b/ppdet/data/source/keypoint_coco.py index bff882f7a7f3dc2cb0b2de8ab322ee0ca20ba8ce..27379a0c3e9e6e2fa02a90b56e99e80bac4ff08d 100644 --- a/ppdet/data/source/keypoint_coco.py +++ b/ppdet/data/source/keypoint_coco.py @@ -15,8 +15,8 @@ import os import cv2 import numpy as np +import json import copy -# TODO: unify xtococotools and pycocotools import pycocotools from pycocotools.coco import COCO from .dataset import DetDataset @@ -317,4 +317,341 @@ class KeypointBottomUpCrowdPoseDataset(KeypointBottomUpCocoDataset): self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'crowdpose' - print(f'=> num_images: {self.num_images}') + print('=> num_images: {}'.format(self.num_images)) + + +@serializable +class KeypointTopDownBaseDataset(DetDataset): + """Base class for top_down datasets. + + All datasets should subclass it. + All subclasses should overwrite: + Methods:`_get_db` + + Args: + dataset_dir (str): Root path to the dataset. + image_dir (str): Path to a directory where images are held. + anno_path (str): Relative path to the annotation file. + num_joints (int): keypoint numbers + transform (composed(operators)): A sequence of data transforms. + """ + + def __init__(self, + dataset_dir, + image_dir, + anno_path, + num_joints, + transform=[]): + super().__init__(dataset_dir, image_dir, anno_path) + self.image_info = {} + self.ann_info = {} + + self.img_prefix = os.path.join(dataset_dir, image_dir) + self.transform = transform + + self.ann_info['num_joints'] = num_joints + self.db = [] + + def __len__(self): + """Get dataset length.""" + return len(self.db) + + def _get_db(self): + """Get a sample""" + raise NotImplementedError + + def __getitem__(self, idx): + """Prepare sample for training given the index.""" + records = copy.deepcopy(self.db[idx]) + records['image'] = cv2.imread(records['image_file'], cv2.IMREAD_COLOR | + cv2.IMREAD_IGNORE_ORIENTATION) + records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB) + records['score'] = records['score'] if 'score' in records else 1 + records = self.transform(records) + # print('records', records) + return records + + +@register +@serializable +class KeypointTopDownCocoDataset(KeypointTopDownBaseDataset): + """COCO dataset for top-down pose estimation. + + The dataset loads raw features and apply specified transforms + to return a dict containing the image tensors and other information. + + COCO keypoint indexes: + + 0: 'nose', + 1: 'left_eye', + 2: 'right_eye', + 3: 'left_ear', + 4: 'right_ear', + 5: 'left_shoulder', + 6: 'right_shoulder', + 7: 'left_elbow', + 8: 'right_elbow', + 9: 'left_wrist', + 10: 'right_wrist', + 11: 'left_hip', + 12: 'right_hip', + 13: 'left_knee', + 14: 'right_knee', + 15: 'left_ankle', + 16: 'right_ankle' + + Args: + dataset_dir (str): Root path to the dataset. + image_dir (str): Path to a directory where images are held. + anno_path (str): Relative path to the annotation file. + num_joints (int): Keypoint numbers + trainsize (list):[w, h] Image target size + transform (composed(operators)): A sequence of data transforms. + bbox_file (str): Path to a detection bbox file + Default: None. + use_gt_bbox (bool): Whether to use ground truth bbox + Default: True. + pixel_std (int): The pixel std of the scale + Default: 200. + image_thre (float): The threshold to filter the detection box + Default: 0.0. + """ + + def __init__(self, + dataset_dir, + image_dir, + anno_path, + num_joints, + trainsize, + transform=[], + bbox_file=None, + use_gt_bbox=True, + pixel_std=200, + image_thre=0.0): + super().__init__(dataset_dir, image_dir, anno_path, num_joints, + transform) + + self.bbox_file = bbox_file + self.use_gt_bbox = use_gt_bbox + self.trainsize = trainsize + self.pixel_std = pixel_std + self.image_thre = image_thre + self.dataset_name = 'coco' + + def parse_dataset(self): + if self.use_gt_bbox: + self.db = self._load_coco_keypoint_annotations() + else: + self.db = self._load_coco_person_detection_results() + + def _load_coco_keypoint_annotations(self): + coco = COCO(self.get_anno()) + img_ids = coco.getImgIds() + gt_db = [] + for index in img_ids: + im_ann = coco.loadImgs(index)[0] + width = im_ann['width'] + height = im_ann['height'] + file_name = im_ann['file_name'] + im_id = int(im_ann["id"]) + + annIds = coco.getAnnIds(imgIds=index, iscrowd=False) + objs = coco.loadAnns(annIds) + + valid_objs = [] + for obj in objs: + x, y, w, h = obj['bbox'] + x1 = np.max((0, x)) + y1 = np.max((0, y)) + x2 = np.min((width - 1, x1 + np.max((0, w - 1)))) + y2 = np.min((height - 1, y1 + np.max((0, h - 1)))) + if obj['area'] > 0 and x2 >= x1 and y2 >= y1: + obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] + valid_objs.append(obj) + objs = valid_objs + + rec = [] + for obj in objs: + if max(obj['keypoints']) == 0: + continue + + joints = np.zeros( + (self.ann_info['num_joints'], 3), dtype=np.float) + joints_vis = np.zeros( + (self.ann_info['num_joints'], 3), dtype=np.float) + for ipt in range(self.ann_info['num_joints']): + joints[ipt, 0] = obj['keypoints'][ipt * 3 + 0] + joints[ipt, 1] = obj['keypoints'][ipt * 3 + 1] + joints[ipt, 2] = 0 + t_vis = obj['keypoints'][ipt * 3 + 2] + if t_vis > 1: + t_vis = 1 + joints_vis[ipt, 0] = t_vis + joints_vis[ipt, 1] = t_vis + joints_vis[ipt, 2] = 0 + + center, scale = self._box2cs(obj['clean_bbox'][:4]) + rec.append({ + 'image_file': os.path.join(self.img_prefix, file_name), + 'center': center, + 'scale': scale, + 'joints': joints, + 'joints_vis': joints_vis, + 'im_id': im_id, + }) + gt_db.extend(rec) + + return gt_db + + def _box2cs(self, box): + x, y, w, h = box[:4] + center = np.zeros((2), dtype=np.float32) + center[0] = x + w * 0.5 + center[1] = y + h * 0.5 + aspect_ratio = self.trainsize[0] * 1.0 / self.trainsize[1] + + if w > aspect_ratio * h: + h = w * 1.0 / aspect_ratio + elif w < aspect_ratio * h: + w = h * aspect_ratio + scale = np.array( + [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], + dtype=np.float32) + if center[0] != -1: + scale = scale * 1.25 + + return center, scale + + def _load_coco_person_detection_results(self): + all_boxes = None + bbox_file_path = os.path.join(self.dataset_dir, self.bbox_file) + with open(bbox_file_path, 'r') as f: + all_boxes = json.load(f) + + if not all_boxes: + print('=> Load %s fail!' % bbox_file_path) + return None + + kpt_db = [] + for n_img in range(0, len(all_boxes)): + det_res = all_boxes[n_img] + if det_res['category_id'] != 1: + continue + file_name = det_res[ + 'filename'] if 'filename' in det_res else '%012d.jpg' % det_res[ + 'image_id'] + img_name = os.path.join(self.img_prefix, file_name) + box = det_res['bbox'] + score = det_res['score'] + im_id = int(det_res['image_id']) + + if score < self.image_thre: + continue + + center, scale = self._box2cs(box) + joints = np.zeros((self.ann_info['num_joints'], 3), dtype=np.float) + joints_vis = np.ones( + (self.ann_info['num_joints'], 3), dtype=np.float) + kpt_db.append({ + 'image_file': img_name, + 'im_id': im_id, + 'center': center, + 'scale': scale, + 'score': score, + 'joints': joints, + 'joints_vis': joints_vis, + }) + + return kpt_db + + +@register +@serializable +class KeypointTopDownMPIIDataset(KeypointTopDownBaseDataset): + """MPII dataset for topdown pose estimation. + + The dataset loads raw features and apply specified transforms + to return a dict containing the image tensors and other information. + + MPII keypoint indexes:: + + 0: 'right_ankle', + 1: 'right_knee', + 2: 'right_hip', + 3: 'left_hip', + 4: 'left_knee', + 5: 'left_ankle', + 6: 'pelvis', + 7: 'thorax', + 8: 'upper_neck', + 9: 'head_top', + 10: 'right_wrist', + 11: 'right_elbow', + 12: 'right_shoulder', + 13: 'left_shoulder', + 14: 'left_elbow', + 15: 'left_wrist', + + Args: + dataset_dir (str): Root path to the dataset. + image_dir (str): Path to a directory where images are held. + anno_path (str): Relative path to the annotation file. + num_joints (int): Keypoint numbers + trainsize (list):[w, h] Image target size + transform (composed(operators)): A sequence of data transforms. + """ + + def __init__(self, + dataset_dir, + image_dir, + anno_path, + num_joints, + transform=[]): + super().__init__(dataset_dir, image_dir, anno_path, num_joints, + transform) + + self.dataset_name = 'mpii' + + def parse_dataset(self): + with open(self.get_anno()) as anno_file: + anno = json.load(anno_file) + + gt_db = [] + for a in anno: + image_name = a['image'] + im_id = a['image_id'] if 'image_id' in a else int( + os.path.splitext(image_name)[0]) + + c = np.array(a['center'], dtype=np.float) + s = np.array([a['scale'], a['scale']], dtype=np.float) + + # Adjust center/scale slightly to avoid cropping limbs + if c[0] != -1: + c[1] = c[1] + 15 * s[1] + s = s * 1.25 + c = c - 1 + + joints = np.zeros((self.ann_info['num_joints'], 3), dtype=np.float) + joints_vis = np.zeros( + (self.ann_info['num_joints'], 3), dtype=np.float) + if 'joints' in a: + joints_ = np.array(a['joints']) + joints_[:, 0:2] = joints_[:, 0:2] - 1 + joints_vis_ = np.array(a['joints_vis']) + assert len(joints_) == self.ann_info[ + 'num_joints'], 'joint num diff: {} vs {}'.format( + len(joints_), self.ann_info['num_joints']) + + joints[:, 0:2] = joints_[:, 0:2] + joints_vis[:, 0] = joints_vis_[:] + joints_vis[:, 1] = joints_vis_[:] + + gt_db.append({ + 'image_file': os.path.join(self.img_prefix, image_name), + 'im_id': im_id, + 'center': c, + 'scale': s, + 'joints': joints, + 'joints_vis': joints_vis + }) + self.db = gt_db diff --git a/ppdet/data/transform/keypoint_operators.py b/ppdet/data/transform/keypoint_operators.py index cdd4e7d3d359c4c73aa45df0ded1dcc40a74743b..b222af999429f37c8829102dadf03a93d298da98 100644 --- a/ppdet/data/transform/keypoint_operators.py +++ b/ppdet/data/transform/keypoint_operators.py @@ -29,7 +29,7 @@ import math import copy import os -from ...modeling.keypoint_utils import get_affine_mat_kernel, warp_affine_joints +from ...modeling.keypoint_utils import get_affine_mat_kernel, warp_affine_joints, get_affine_transform, affine_transform from ppdet.core.workspace import serializable from ppdet.utils.logger import setup_logger logger = setup_logger(__name__) @@ -38,7 +38,8 @@ registered_ops = [] __all__ = [ 'RandomAffine', 'KeyPointFlip', 'TagGenerate', 'ToHeatmaps', - 'NormalizePermute', 'EvalAffine' + 'NormalizePermute', 'EvalAffine', 'RandomFlipHalfBodyTransform', + 'TopDownAffine', 'ToHeatmapsTopDown' ] @@ -403,3 +404,229 @@ class ToHeatmaps(object): records['mask_{}x'.format(idx + 1)] = mask del records['mask'] return records + + +@register_keypointop +class RandomFlipHalfBodyTransform(object): + """apply data augment to image and coords + to achieve the flip, scale, rotate and half body transform effect for training image + + Args: + trainsize (list):[w, h], Image target size + upper_body_ids (list): The upper body joint ids + flip_pairs (list): The left-right joints exchange order list + pixel_std (int): The pixel std of the scale + scale (float): The scale factor to transform the image + rot (int): The rotate factor to transform the image + num_joints_half_body (int): The joints threshold of the half body transform + prob_half_body (float): The threshold of the half body transform + flip (bool): Whether to flip the image + + Returns: + records(dict): contain the image and coords after tranformed + + """ + + def __init__(self, + trainsize, + upper_body_ids, + flip_pairs, + pixel_std, + scale=0.35, + rot=40, + num_joints_half_body=8, + prob_half_body=0.3, + flip=True, + rot_prob=0.6): + super(RandomFlipHalfBodyTransform, self).__init__() + self.trainsize = trainsize + self.upper_body_ids = upper_body_ids + self.flip_pairs = flip_pairs + self.pixel_std = pixel_std + self.scale = scale + self.rot = rot + self.num_joints_half_body = num_joints_half_body + self.prob_half_body = prob_half_body + self.flip = flip + self.aspect_ratio = trainsize[0] * 1.0 / trainsize[1] + self.rot_prob = rot_prob + + def halfbody_transform(self, joints, joints_vis): + upper_joints = [] + lower_joints = [] + for joint_id in range(joints.shape[0]): + if joints_vis[joint_id][0] > 0: + if joint_id in self.upper_body_ids: + upper_joints.append(joints[joint_id]) + else: + lower_joints.append(joints[joint_id]) + if np.random.randn() < 0.5 and len(upper_joints) > 2: + selected_joints = upper_joints + else: + selected_joints = lower_joints if len( + lower_joints) > 2 else upper_joints + if len(selected_joints) < 2: + return None, None + selected_joints = np.array(selected_joints, dtype=np.float32) + center = selected_joints.mean(axis=0)[:2] + left_top = np.amin(selected_joints, axis=0) + right_bottom = np.amax(selected_joints, axis=0) + w = right_bottom[0] - left_top[0] + h = right_bottom[1] - left_top[1] + if w > self.aspect_ratio * h: + h = w * 1.0 / self.aspect_ratio + elif w < self.aspect_ratio * h: + w = h * self.aspect_ratio + scale = np.array( + [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], + dtype=np.float32) + scale = scale * 1.5 + + return center, scale + + def flip_joints(self, joints, joints_vis, width, matched_parts): + joints[:, 0] = width - joints[:, 0] - 1 + for pair in matched_parts: + joints[pair[0], :], joints[pair[1], :] = \ + joints[pair[1], :], joints[pair[0], :].copy() + joints_vis[pair[0], :], joints_vis[pair[1], :] = \ + joints_vis[pair[1], :], joints_vis[pair[0], :].copy() + + return joints * joints_vis, joints_vis + + def __call__(self, records): + image = records['image'] + joints = records['joints'] + joints_vis = records['joints_vis'] + c = records['center'] + s = records['scale'] + r = 0 + if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and + np.random.rand() < self.prob_half_body): + c_half_body, s_half_body = self.halfbody_transform(joints, + joints_vis) + if c_half_body is not None and s_half_body is not None: + c, s = c_half_body, s_half_body + sf = self.scale + rf = self.rot + s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) + r = np.clip(np.random.randn() * rf, -rf * 2, + rf * 2) if np.random.random() <= self.rot_prob else 0 + + if self.flip and np.random.random() <= 0.5: + image = image[:, ::-1, :] + joints, joints_vis = self.flip_joints( + joints, joints_vis, image.shape[1], self.flip_pairs) + c[0] = image.shape[1] - c[0] - 1 + records['image'] = image + records['joints'] = joints + records['joints_vis'] = joints_vis + records['center'] = c + records['scale'] = s + records['rotate'] = r + + return records + + +@register_keypointop +class TopDownAffine(object): + """apply affine transform to image and coords + + Args: + trainsize (list): [w, h], the standard size used to train + records(dict): the dict contained the image and coords + + Returns: + records (dict): contain the image and coords after tranformed + + """ + + def __init__(self, trainsize): + self.trainsize = trainsize + + def __call__(self, records): + image = records['image'] + joints = records['joints'] + joints_vis = records['joints_vis'] + rot = records['rotate'] if "rotate" in records else 0 + trans = get_affine_transform(records['center'], records['scale'] * 200, + rot, self.trainsize) + image = cv2.warpAffine( + image, + trans, (int(self.trainsize[0]), int(self.trainsize[1])), + flags=cv2.INTER_LINEAR) + for i in range(joints.shape[0]): + if joints_vis[i, 0] > 0.0: + joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) + records['image'] = image + records['joints'] = joints + + return records + + +@register_keypointop +class ToHeatmapsTopDown(object): + """to generate the gaussin heatmaps of keypoint for heatmap loss + + Args: + hmsize (list): [w, h] output heatmap's size + sigma (float): the std of gaussin kernel genereted + records(dict): the dict contained the image and coords + + Returns: + records (dict): contain the heatmaps used to heatmaploss + + """ + + def __init__(self, hmsize, sigma): + super(ToHeatmapsTopDown, self).__init__() + self.hmsize = np.array(hmsize) + self.sigma = sigma + + def __call__(self, records): + joints = records['joints'] + joints_vis = records['joints_vis'] + num_joints = joints.shape[0] + image_size = np.array( + [records['image'].shape[1], records['image'].shape[0]]) + target_weight = np.ones((num_joints, 1), dtype=np.float32) + target_weight[:, 0] = joints_vis[:, 0] + target = np.zeros( + (num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32) + tmp_size = self.sigma * 3 + for joint_id in range(num_joints): + feat_stride = image_size / self.hmsize + mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5) + mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5) + # Check that any part of the gaussian is in-bounds + ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] + br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] + if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[ + 0] < 0 or br[1] < 0: + # If not, just return the image as is + target_weight[joint_id] = 0 + continue + # # Generate gaussian + size = 2 * tmp_size + 1 + x = np.arange(0, size, 1, np.float32) + y = x[:, np.newaxis] + x0 = y0 = size // 2 + # The gaussian is not normalized, we want the center value to equal 1 + g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * self.sigma**2)) + + # Usable gaussian range + g_x = max(0, -ul[0]), min(br[0], self.hmsize[0]) - ul[0] + g_y = max(0, -ul[1]), min(br[1], self.hmsize[1]) - ul[1] + # Image range + img_x = max(0, ul[0]), min(br[0], self.hmsize[0]) + img_y = max(0, ul[1]), min(br[1], self.hmsize[1]) + + v = target_weight[joint_id] + if v > 0.5: + target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[ + 0]:g_y[1], g_x[0]:g_x[1]] + records['target'] = target + records['target_weight'] = target_weight + del records['joints'], records['joints_vis'] + + return records diff --git a/ppdet/engine/callbacks.py b/ppdet/engine/callbacks.py index 0798b91f3216977577d3026f9bbfa22486b65950..ef145a414b640359c943b3617926c999cd9fbec4 100644 --- a/ppdet/engine/callbacks.py +++ b/ppdet/engine/callbacks.py @@ -166,7 +166,12 @@ class Checkpointer(Callback): if 'save_best_model' in status and status['save_best_model']: for metric in self.model._metrics: map_res = metric.get_results() - key = 'bbox' if 'bbox' in map_res else 'mask' + if 'bbox' in map_res: + key = 'bbox' + elif 'keypoint' in map_res: + key = 'keypoint' + else: + key = 'mask' if key not in map_res: logger.warn("Evaluation results empty, this may be due to " \ "training iterations being too few or not " \ diff --git a/ppdet/engine/export_utils.py b/ppdet/engine/export_utils.py index cafe933117cd1fbe9a236cbdee7ce4801f49f2d6..9308451406b8574185395c64235377e5fc59482b 100644 --- a/ppdet/engine/export_utils.py +++ b/ppdet/engine/export_utils.py @@ -37,10 +37,11 @@ TRT_MIN_SUBGRAPH = { 'TTFNet': 3, 'FCOS': 16, 'SOLOv2': 60, - 'HigherHrnet': 40, + 'HigherHRNet': 3, + 'HRNet': 3, } -KEYPOINT_ARCH = ['HigherHrnet', 'Hrnet'] +KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet'] def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape): diff --git a/ppdet/engine/trainer.py b/ppdet/engine/trainer.py index 6b5d157fb3b8fafb4efaa82b83e6e2c74142a439..9a3f2738fb04a8ccb3781bdea917a28e5d198845 100644 --- a/ppdet/engine/trainer.py +++ b/ppdet/engine/trainer.py @@ -33,7 +33,7 @@ from ppdet.optimizer import ModelEMA from ppdet.core.workspace import create from ppdet.utils.checkpoint import load_weight, load_pretrain_weight from ppdet.utils.visualizer import visualize_results, save_result -from ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, get_infer_results +from ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, get_infer_results, KeyPointTopDownCOCOEval from ppdet.data.source.category import get_categories import ppdet.utils.stats as stats @@ -173,6 +173,15 @@ class Trainer(object): anno_file=self.dataset.get_anno(), multi_scale=multi_scale) ] + elif self.cfg.metric == 'KeyPointTopDownCOCOEval': + eval_dataset = self.cfg['EvalDataset'] + eval_dataset.check_or_download_dataset() + anno_file = eval_dataset.get_anno() + self._metrics = [ + KeyPointTopDownCOCOEval(anno_file, + len(eval_dataset), self.cfg.num_joints, + self.cfg.save_dir) + ] else: logger.warn("Metric not support for metric type {}".format( self.cfg.metric)) @@ -374,6 +383,7 @@ class Trainer(object): self.status['step_id'] = step_id # forward outs = self.model(data) + for key in ['im_shape', 'scale_factor', 'im_id']: outs[key] = data[key] for key, value in outs.items(): diff --git a/ppdet/metrics/__init__.py b/ppdet/metrics/__init__.py index 9cadb1f68320ba69277a15370b290310db29a888..a12e6ed906840a129823c8103bb16d9c2302884d 100644 --- a/ppdet/metrics/__init__.py +++ b/ppdet/metrics/__init__.py @@ -13,7 +13,8 @@ # limitations under the License. from . import metrics - +from . import keypoint_metrics from .metrics import * +from .keypoint_metrics import * -__all__ = metrics.__all__ +__all__ = metrics.__all__ + keypoint_metrics.__all__ diff --git a/ppdet/metrics/keypoint_metrics.py b/ppdet/metrics/keypoint_metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..5a033806f98109430756692cfb0d70fe0abffc4c --- /dev/null +++ b/ppdet/metrics/keypoint_metrics.py @@ -0,0 +1,204 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import os +import json +from collections import OrderedDict +from collections import defaultdict +import numpy as np +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from ..modeling.keypoint_utils import oks_nms + +__all__ = ['KeyPointTopDownCOCOEval'] + + +class KeyPointTopDownCOCOEval(object): + def __init__(self, + anno_file, + num_samples, + num_joints, + output_eval, + iou_type='keypoints', + in_vis_thre=0.2, + oks_thre=0.9): + super(KeyPointTopDownCOCOEval, self).__init__() + self.coco = COCO(anno_file) + self.num_samples = num_samples + self.num_joints = num_joints + self.iou_type = iou_type + self.in_vis_thre = in_vis_thre + self.oks_thre = oks_thre + self.output_eval = output_eval + self.res_file = os.path.join(output_eval, "keypoints_results.json") + self.reset() + + def reset(self): + self.results = { + 'all_preds': np.zeros( + (self.num_samples, self.num_joints, 3), dtype=np.float32), + 'all_boxes': np.zeros((self.num_samples, 6)), + 'image_path': [] + } + self.eval_results = {} + self.idx = 0 + + def update(self, inputs, outputs): + kpt_coord = outputs['kpt_coord'] + kpt_score = outputs['kpt_score'] + num_images = inputs['image'].shape[0] + self.results['all_preds'][self.idx:self.idx + num_images, :, 0: + 2] = kpt_coord[:, :, 0:2] + self.results['all_preds'][self.idx:self.idx + num_images, :, 2: + 3] = kpt_score + self.results['all_boxes'][self.idx:self.idx + num_images, 0:2] = inputs[ + 'center'].numpy()[:, 0:2] + self.results['all_boxes'][self.idx:self.idx + num_images, 2:4] = inputs[ + 'scale'].numpy()[:, 0:2] + self.results['all_boxes'][self.idx:self.idx + num_images, 4] = np.prod( + inputs['scale'].numpy() * 200, 1) + self.results['all_boxes'][self.idx:self.idx + num_images, + 5] = np.squeeze(inputs['score'].numpy()) + self.results['image_path'].extend(inputs['im_id'].numpy()) + + self.idx += num_images + + def _write_coco_keypoint_results(self, keypoints): + data_pack = [{ + 'cat_id': 1, + 'cls': 'person', + 'ann_type': 'keypoints', + 'keypoints': keypoints + }] + results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) + if not os.path.exists(self.output_eval): + os.makedirs(self.output_eval) + with open(self.res_file, 'w') as f: + json.dump(results, f, sort_keys=True, indent=4) + try: + json.load(open(self.res_file)) + except Exception: + content = [] + with open(self.res_file, 'r') as f: + for line in f: + content.append(line) + content[-1] = ']' + with open(self.res_file, 'w') as f: + for c in content: + f.write(c) + + def _coco_keypoint_results_one_category_kernel(self, data_pack): + cat_id = data_pack['cat_id'] + keypoints = data_pack['keypoints'] + cat_results = [] + + for img_kpts in keypoints: + if len(img_kpts) == 0: + continue + + _key_points = np.array( + [img_kpts[k]['keypoints'] for k in range(len(img_kpts))]) + _key_points = _key_points.reshape(_key_points.shape[0], -1) + + result = [{ + 'image_id': img_kpts[k]['image'], + 'category_id': cat_id, + 'keypoints': list(_key_points[k]), + 'score': img_kpts[k]['score'], + 'center': list(img_kpts[k]['center']), + 'scale': list(img_kpts[k]['scale']) + } for k in range(len(img_kpts))] + cat_results.extend(result) + + return cat_results + + def get_final_results(self, preds, all_boxes, img_path): + _kpts = [] + for idx, kpt in enumerate(preds): + _kpts.append({ + 'keypoints': kpt, + 'center': all_boxes[idx][0:2], + 'scale': all_boxes[idx][2:4], + 'area': all_boxes[idx][4], + 'score': all_boxes[idx][5], + 'image': int(img_path[idx]) + }) + # image x person x (keypoints) + kpts = defaultdict(list) + for kpt in _kpts: + kpts[kpt['image']].append(kpt) + + # rescoring and oks nms + num_joints = preds.shape[1] + in_vis_thre = self.in_vis_thre + oks_thre = self.oks_thre + oks_nmsed_kpts = [] + for img in kpts.keys(): + img_kpts = kpts[img] + for n_p in img_kpts: + box_score = n_p['score'] + kpt_score = 0 + valid_num = 0 + for n_jt in range(0, num_joints): + t_s = n_p['keypoints'][n_jt][2] + if t_s > in_vis_thre: + kpt_score = kpt_score + t_s + valid_num = valid_num + 1 + if valid_num != 0: + kpt_score = kpt_score / valid_num + # rescoring + n_p['score'] = kpt_score * box_score + + keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))], + oks_thre) + + if len(keep) == 0: + oks_nmsed_kpts.append(img_kpts) + else: + oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) + + self._write_coco_keypoint_results(oks_nmsed_kpts) + + def accumulate(self): + self.get_final_results(self.results['all_preds'], + self.results['all_boxes'], + self.results['image_path']) + coco_dt = self.coco.loadRes(self.res_file) + coco_eval = COCOeval(self.coco, coco_dt, 'keypoints') + coco_eval.params.useSegm = None + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + + keypoint_stats = [] + for ind in range(len(coco_eval.stats)): + keypoint_stats.append((coco_eval.stats[ind])) + self.eval_results['keypoint'] = keypoint_stats + + def log(self): + stats_names = [ + 'AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', + 'AR .75', 'AR (M)', 'AR (L)' + ] + num_values = len(stats_names) + print(' '.join(['| {}'.format(name) for name in stats_names]) + ' |') + print('|---' * (num_values + 1) + '|') + + print(' '.join([ + '| {:.3f}'.format(value) for value in self.eval_results['keypoint'] + ]) + ' |') + + def get_results(self): + return self.eval_results diff --git a/ppdet/optimizer.py b/ppdet/optimizer.py index 1fcfbf856a9f0bb110607e2dfe0fd768d511a966..dc86023a60bbf33c2d4996f02e160d9fbe200e8d 100644 --- a/ppdet/optimizer.py +++ b/ppdet/optimizer.py @@ -139,10 +139,11 @@ class LinearWarmup(object): boundary = [] value = [] for i in range(self.steps + 1): - alpha = i / self.steps - factor = self.start_factor * (1 - alpha) + alpha - lr = base_lr * factor - value.append(lr) + if self.steps > 0: + alpha = i / self.steps + factor = self.start_factor * (1 - alpha) + alpha + lr = base_lr * factor + value.append(lr) if i > 0: boundary.append(i) return boundary, value