提交 35b8511e 编写于 作者: Z zhengya01

add ce for human_pose_estimation

上级 a5ededde
#!/bin/bash
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
cudaid=${face_detection:=7} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --dataset=coco --num_epochs=2 --batch_num=10 --enable_ce | python _ce.py
cudaid=${face_detection_m:=4,5,6,7} # use 0,1,2,3 card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --dataset=coco --num_epochs=2 --batch_num=10 --enable_ce | python _ce.py
# this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi
from kpi import DurationKpi
each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.08, 0, actived=True)
train_loss_card1_kpi = CostKpi('train_loss_card1', 0.08, 0)
each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.08, 0, actived=True)
train_loss_card4_kpi = CostKpi('train_loss_card4', 0.08, 0)
tracking_kpis = [
each_pass_duration_card1_kpi,
train_loss_card1_kpi,
each_pass_duration_card4_kpi,
train_loss_card4_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
# Copyright (c) 2018-present, Baidu, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
"""Data reader for COCO dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import functools
import numpy as np
import cv2
import random
from utils.transforms import fliplr_joints
from utils.transforms import get_affine_transform
from utils.transforms import affine_transform
from lib.base_reader import visualize, generate_target
from pycocotools.coco import COCO
# NOTE
# -- COCO Datatset --
# "keypoints":
# {
# 0: "nose",
# 1: "left_eye",
# 2: "right_eye",
# 3: "left_ear",
# 4: "right_ear",
# 5: "left_shoulder",
# 6: "right_shoulder",
# 7: "left_elbow",
# 8: "right_elbow",
# 9: "left_wrist",
# 10: "right_wrist",
# 11: "left_hip",
# 12: "right_hip",
# 13: "left_knee",
# 14: "right_knee",
# 15: "left_ankle",
# 16: "right_ankle"
# },
#
# "skeleton":
# [
# [16,14],[14,12],[17,15],[15,13],[12,13],[6,12],[7,13], [6,7],[6,8],
# [7,9],[8,10],[9,11],[2,3],[1,2],[1,3],[2,4],[3,5],[4,6],[5,7]
# ]
class Config:
"""Configurations for COCO dataset.
"""
DEBUG = False
TMPDIR = 'tmp_fold_for_debug'
# For reader
BUF_SIZE = 102400
THREAD = 1 if DEBUG else 8 # have to be larger than 0
# Fixed infos of dataset
DATAROOT = 'data/coco'
IMAGEDIR = 'images'
NUM_JOINTS = 17
FLIP_PAIRS = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
PARENT_IDS = None
# CFGS
SCALE_FACTOR = 0.3
ROT_FACTOR = 40
FLIP = True
TARGET_TYPE = 'gaussian'
SIGMA = 3
IMAGE_SIZE = [288, 384]
HEATMAP_SIZE = [72, 96]
ASPECT_RATIO = IMAGE_SIZE[0] * 1.0 / IMAGE_SIZE[1]
MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]
PIXEL_STD = 200
cfg = Config()
def _box2cs(box):
x, y, w, h = box[:4]
return _xywh2cs(x, y, w, h)
def _xywh2cs(x, y, w, h):
center = np.zeros((2), dtype=np.float32)
center[0] = x + w * 0.5
center[1] = y + h * 0.5
if w > cfg.ASPECT_RATIO * h:
h = w * 1.0 / cfg.ASPECT_RATIO
elif w < cfg.ASPECT_RATIO * h:
w = h * cfg.ASPECT_RATIO
scale = np.array(
[w * 1.0 / cfg.PIXEL_STD, h * 1.0 / cfg.PIXEL_STD],
dtype=np.float32)
if center[0] != -1:
scale = scale * 1.25
return center, scale
def _select_data(db):
db_selected = []
for rec in db:
num_vis = 0
joints_x = 0.0
joints_y = 0.0
for joint, joint_vis in zip(
rec['joints_3d'], rec['joints_3d_vis']):
if joint_vis[0] <= 0:
continue
num_vis += 1
joints_x += joint[0]
joints_y += joint[1]
if num_vis == 0:
continue
joints_x, joints_y = joints_x / num_vis, joints_y / num_vis
area = rec['scale'][0] * rec['scale'][1] * (cfg.PIXEL_STD**2)
joints_center = np.array([joints_x, joints_y])
bbox_center = np.array(rec['center'])
diff_norm2 = np.linalg.norm((joints_center-bbox_center), 2)
ks = np.exp(-1.0*(diff_norm2**2) / ((0.2)**2*2.0*area))
metric = (0.2 / 16) * num_vis + 0.45 - 0.2 / 16
if ks > metric:
db_selected.append(rec)
print('=> num db: {}'.format(len(db)))
print('=> num selected db: {}'.format(len(db_selected)))
return db_selected
def _load_coco_keypoint_annotation(image_set_index, coco, _coco_ind_to_class_ind, image_set):
"""Ground truth bbox and keypoints.
"""
print('generating coco gt_db...')
gt_db = []
for index in image_set_index:
im_ann = coco.loadImgs(index)[0]
width = im_ann['width']
height = im_ann['height']
annIds = coco.getAnnIds(imgIds=index, iscrowd=False)
objs = coco.loadAnns(annIds)
# Sanitize bboxes
valid_objs = []
for obj in objs:
x, y, w, h = obj['bbox']
x1 = np.max((0, x))
y1 = np.max((0, y))
x2 = np.min((width - 1, x1 + np.max((0, w - 1))))
y2 = np.min((height - 1, y1 + np.max((0, h - 1))))
if obj['area'] > 0 and x2 >= x1 and y2 >= y1:
obj['clean_bbox'] = [x1, y1, x2-x1, y2-y1]
valid_objs.append(obj)
objs = valid_objs
rec = []
for obj in objs:
cls = _coco_ind_to_class_ind[obj['category_id']]
if cls != 1:
continue
# Ignore objs without keypoints annotation
if max(obj['keypoints']) == 0:
continue
joints_3d = np.zeros((cfg.NUM_JOINTS, 3), dtype=np.float)
joints_3d_vis = np.zeros((cfg.NUM_JOINTS, 3), dtype=np.float)
for ipt in range(cfg.NUM_JOINTS):
joints_3d[ipt, 0] = obj['keypoints'][ipt * 3 + 0]
joints_3d[ipt, 1] = obj['keypoints'][ipt * 3 + 1]
joints_3d[ipt, 2] = 0
t_vis = obj['keypoints'][ipt * 3 + 2]
if t_vis > 1:
t_vis = 1
joints_3d_vis[ipt, 0] = t_vis
joints_3d_vis[ipt, 1] = t_vis
joints_3d_vis[ipt, 2] = 0
center, scale = _box2cs(obj['clean_bbox'][:4])
rec.append({
'image': os.path.join(cfg.DATAROOT, cfg.IMAGEDIR, image_set+'2017', '%012d.jpg' % index),
'center': center,
'scale': scale,
'joints_3d': joints_3d,
'joints_3d_vis': joints_3d_vis,
'filename': '%012d.jpg' % index,
'imgnum': 0,
})
gt_db.extend(rec)
return gt_db
def data_augmentation(sample, is_train):
image_file = sample['image']
filename = sample['filename'] if 'filename' in sample else ''
joints = sample['joints_3d']
joints_vis = sample['joints_3d_vis']
c = sample['center']
s = sample['scale']
# score = sample['score'] if 'score' in sample else 1
# imgnum = sample['imgnum'] if 'imgnum' in sample else ''
r = 0
data_numpy = cv2.imread(
image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
if is_train:
sf = cfg.SCALE_FACTOR
rf = cfg.ROT_FACTOR
s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
if random.random() <= 0.6 else 0
if cfg.FLIP and random.random() <= 0.5:
data_numpy = data_numpy[:, ::-1, :]
joints, joints_vis = fliplr_joints(
joints, joints_vis, data_numpy.shape[1], cfg.FLIP_PAIRS)
c[0] = data_numpy.shape[1] - c[0] - 1
trans = get_affine_transform(c, s, r, cfg.IMAGE_SIZE)
input = cv2.warpAffine(
data_numpy,
trans,
(int(cfg.IMAGE_SIZE[0]), int(cfg.IMAGE_SIZE[1])),
flags=cv2.INTER_LINEAR)
for i in range(cfg.NUM_JOINTS):
if joints_vis[i, 0] > 0.0:
joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
# Numpy target
target, target_weight = generate_target(cfg, joints, joints_vis)
if cfg.DEBUG:
visualize(cfg, filename, data_numpy, input.copy(), joints, target)
# Normalization
input = input.astype('float32').transpose((2, 0, 1)) / 255
input -= np.array(cfg.MEAN).reshape((3, 1, 1))
input /= np.array(cfg.STD).reshape((3, 1, 1))
if is_train:
return input, target, target_weight
else:
return input, target, target_weight
# Create a reader
def _reader_creator(root, image_set, shuffle=False, is_train=False, use_gt_bbox=False):
def reader():
if image_set in ['train', 'val']:
file_name = os.path.join(root, 'annotations', 'person_keypoints_'+image_set+'2017.json')
elif image_set in ['test', 'test-dev']:
file_name = os.path.join(root, 'annotations', 'image_info_'+image_set+'2017.json')
else:
raise ValueError("The dataset '{}' is not supported".format(image_set))
# Load annotations
coco = COCO(file_name)
# Deal with class names
cats = [cat['name']
for cat in coco.loadCats(coco.getCatIds())]
classes = ['__background__'] + cats
print('=> classes: {}'.format(classes))
num_classes = len(classes)
_class_to_ind = dict(zip(classes, range(num_classes)))
_class_to_coco_ind = dict(zip(cats, coco.getCatIds()))
_coco_ind_to_class_ind = dict([(_class_to_coco_ind[cls],
_class_to_ind[cls])
for cls in classes[1:]])
# Load image file names
image_set_index = coco.getImgIds()
num_images = len(image_set_index)
print('=> num_images: {}'.format(num_images))
if is_train or use_gt_bbox:
gt_db = _load_coco_keypoint_annotation(
image_set_index, coco, _coco_ind_to_class_ind, image_set)
gt_db = _select_data(gt_db)
if shuffle:
random.shuffle(gt_db)
for db in gt_db:
yield db
mapper = functools.partial(data_augmentation, is_train=is_train)
return reader, mapper
def train():
reader, mapper = _reader_creator(cfg.DATAROOT, 'train', shuffle=True, is_train=True)
def pop():
for i, x in enumerate(reader()):
yield mapper(x)
return pop
def train_ce():
reader, mapper = _reader_creator(cfg.DATAROOT, 'train', use_gt_bbox=True)
def pop():
for i, x in enumerate(reader()):
yield mapper(x)
return pop
def valid():
reader, mapper = _reader_creator(cfg.DATAROOT, 'val', shuffle=False, is_train=False)
def pop():
for i, x in enumerate(reader()):
yield mapper(x)
return pop
......@@ -17,6 +17,7 @@
import os
import numpy as np
import time
import cv2
import paddle
import paddle.fluid as fluid
......@@ -42,6 +43,8 @@ add_arg('pretrained_model', str, None, "Whether to use pretrai
add_arg('checkpoint', str, None, "Whether to resume checkpoint.")
add_arg('lr', float, 0.001, "Set learning rate.")
add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate decay strategy.")
parser.add_argument('--enable_ce', action='store_true', help='If set, run the task with continuous evaluation logs.')
parser.add_argument('--batch_num', type=int, help="batch num for ce")
# yapf: enable
def optimizer_setting(args, params):
......@@ -93,6 +96,11 @@ def train(args):
print_arguments(args)
if args.enable_ce:
SEED = 102
fluid.default_main_program().random_seed = SEED
fluid.default_startup_program().random_seed = SEED
# Image and target
image = layers.data(name='image', shape=[3, IMAGE_SIZE[1], IMAGE_SIZE[0]], dtype='float32')
target = layers.data(name='target', shape=[args.kp_dim, HEATMAP_SIZE[1], HEATMAP_SIZE[0]], dtype='float32')
......@@ -137,14 +145,23 @@ def train(args):
# Dataloader
train_reader = paddle.batch(reader.train(), batch_size=args.batch_size)
if args.enable_ce:
import lib.coco_reader_ce as reader_ce
train_reader = paddle.batch(reader_ce.train_ce(), batch_size=args.batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=[image, target, target_weight])
train_exe = fluid.ParallelExecutor(
use_cuda=True if args.use_gpu else False, loss_name=loss.name)
fetch_list = [image.name, loss.name, output.name]
total_time = 0
last_loss = 0
for pass_id in range(params["num_epochs"]):
for batch_id, data in enumerate(train_reader()):
if args.enable_ce and args.batch_num is not None:
if batch_id >= args.batch_num:
break
start_time = time.time()
current_lr = np.array(paddle.fluid.global_scope().find_var('learning_rate').get_tensor())
input_image, loss, out_heatmaps = train_exe.run(
......@@ -152,6 +169,10 @@ def train(args):
loss = np.mean(np.array(loss))
end_time = time.time()
total_time += end_time - start_time
last_loss = loss
print('Epoch [{:4d}/{:3d}] LR: {:.10f} '
'Loss = {:.5f}'.format(
batch_id, pass_id, current_lr[0], loss))
......@@ -159,11 +180,29 @@ def train(args):
if batch_id % 10 == 0:
save_batch_heatmaps(input_image, out_heatmaps, file_name='visualization@train.jpg', normalize=True)
model_path = os.path.join(args.model_save_dir + '/' + 'simplebase-{}'.format(args.dataset),
str(pass_id))
if not os.path.isdir(model_path):
os.makedirs(model_path)
fluid.io.save_persistables(exe, model_path)
# only for ce
if args.enable_ce:
epoch_idx = params["num_epochs"]
gpu_num = get_cards(args)
print("kpis\teach_pass_duration_card%s\t%s" %
(gpu_num, total_time / epoch_idx))
print("kpis\ttrain_loss_card%s\t%s" %
(gpu_num, last_loss))
def get_cards(args):
if args.enable_ce:
cards = os.environ.get('CUDA_VISIBLE_DEVICES')
num = len(cards.split(","))
return num
else:
return args.num_devices
if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册