提交 b3937aeb 编写于 作者: Z zhengya01

add ce for human_pose_estimation

上级 dde59008
#!/bin/bash
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
cudaid=${human_pose_estimation:=0} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --dataset=coco --num_epochs=3 --batch_num=50 --enable_ce | python _ce.py
cudaid=${human_pose_estimation_4:=0,1,2,3} # use 0,1,2,3 card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --dataset=coco --num_epochs=3 --batch_num=50 --enable_ce | python _ce.py
# this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi
from kpi import DurationKpi
each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.08, 0, actived=True)
train_loss_card1_kpi = CostKpi('train_loss_card1', 0.08, 0)
each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.08, 0, actived=True)
train_loss_card4_kpi = CostKpi('train_loss_card4', 0.08, 0)
tracking_kpis = [
each_pass_duration_card1_kpi,
train_loss_card1_kpi,
each_pass_duration_card4_kpi,
train_loss_card4_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
......@@ -17,6 +17,7 @@
import os
import numpy as np
import time
import cv2
import paddle
import paddle.fluid as fluid
......@@ -42,6 +43,8 @@ add_arg('pretrained_model', str, None, "Whether to use pretrai
add_arg('checkpoint', str, None, "Whether to resume checkpoint.")
add_arg('lr', float, 0.001, "Set learning rate.")
add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate decay strategy.")
parser.add_argument('--enable_ce', action='store_true', help='If set, run the task with continuous evaluation logs.')
parser.add_argument('--batch_num', type=int, help="batch num for ce")
# yapf: enable
def optimizer_setting(args, params):
......@@ -93,6 +96,11 @@ def train(args):
print_arguments(args)
if args.enable_ce:
SEED = 102
fluid.default_main_program().random_seed = SEED
fluid.default_startup_program().random_seed = SEED
# Image and target
image = layers.data(name='image', shape=[3, IMAGE_SIZE[1], IMAGE_SIZE[0]], dtype='float32')
target = layers.data(name='target', shape=[args.kp_dim, HEATMAP_SIZE[1], HEATMAP_SIZE[0]], dtype='float32')
......@@ -137,14 +145,21 @@ def train(args):
# Dataloader
train_reader = paddle.batch(reader.train(), batch_size=args.batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=[image, target, target_weight])
train_exe = fluid.ParallelExecutor(
use_cuda=True if args.use_gpu else False, loss_name=loss.name)
fetch_list = [image.name, loss.name, output.name]
total_time = 0
last_loss = 0
for pass_id in range(params["num_epochs"]):
for batch_id, data in enumerate(train_reader()):
if args.enable_ce and args.batch_num is not None:
if batch_id >= args.batch_num:
break
start_time = time.time()
current_lr = np.array(paddle.fluid.global_scope().find_var('learning_rate').get_tensor())
input_image, loss, out_heatmaps = train_exe.run(
......@@ -152,6 +167,10 @@ def train(args):
loss = np.mean(np.array(loss))
end_time = time.time()
total_time += end_time - start_time
last_loss = loss
print('Epoch [{:4d}/{:3d}] LR: {:.10f} '
'Loss = {:.5f}'.format(
batch_id, pass_id, current_lr[0], loss))
......@@ -159,11 +178,26 @@ def train(args):
if batch_id % 10 == 0:
save_batch_heatmaps(input_image, out_heatmaps, file_name='visualization@train.jpg', normalize=True)
model_path = os.path.join(args.model_save_dir + '/' + 'simplebase-{}'.format(args.dataset),
str(pass_id))
if not os.path.isdir(model_path):
os.makedirs(model_path)
fluid.io.save_persistables(exe, model_path)
# only for ce
if args.enable_ce:
epoch_idx = params["num_epochs"]
gpu_num = get_cards(args)
print("kpis\teach_pass_duration_card%s\t%s" %
(gpu_num, total_time / epoch_idx))
print("kpis\ttrain_loss_card%s\t%s" %
(gpu_num, last_loss))
def get_cards(args):
cards = os.environ.get('CUDA_VISIBLE_DEVICES')
num = len(cards.split(","))
return num
if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册