未验证 提交 b1264be0 编写于 作者: G guochaorong 提交者: GitHub

Merge branch 'develop' into language_model_for_ce

......@@ -2,4 +2,4 @@
####This file is only used for continuous evaluation.
model_file='train.py'
python $model_file --pass_num 1 --learning_rate 0.001 --save_interval 10 --enable_ce
python $model_file --pass_num 1 --learning_rate 0.001 --save_interval 10 --enable_ce | python _ce.py
cp -r ./data/pascalvoc/. /home/.cache/paddle/dataset/pascalvoc
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
cudaid=${object_detection_cudaid:=0} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
if [ ! -d "/root/.cache/paddle/dataset/pascalvoc" ];then
mkdir -p /root/.cache/paddle/dataset/pascalvoc
./data/pascalvoc/download.sh
bash ./.move.sh
fi
FLAGS_benchmark=true python train.py --batch_size=64 --num_passes=2 --for_model_ce=True --data_dir=/root/.cache/paddle/dataset/pascalvoc/
###!/bin/bash
####This file is only used for continuous evaluation.
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
if [ ! -d "/root/.cache/paddle/dataset/pascalvoc" ];then
mkdir -p /root/.cache/paddle/dataset/pascalvoc
./data/pascalvoc/download.sh
cp -r ./data/pascalvoc/. /home/.cache/paddle/dataset/pascalvoc
fi
cudaid=${object_detection_cudaid:=0}
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py
cudaid=${object_detection_cudaid:=0,1,2,3}
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py
####this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, actived=True)
test_acc_kpi = AccKpi('test_acc', 0.01, actived=True)
train_speed_kpi = AccKpi('train_speed', 0.2, actived=True)
train_cost_card4_kpi = CostKpi('train_cost_card4', 0.02, actived=True)
test_acc_card4_kpi = AccKpi('test_acc_card4', 0.01, actived=True)
train_speed_card4_kpi = AccKpi('train_speed_card4', 0.2, actived=True)
tracking_kpis = [
train_cost_kpi,
test_acc_kpi,
train_speed_kpi,
train_cost_card4_kpi,
test_acc_card4_kpi,
train_speed_card4_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
#kpi_map = {}
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
print("-----%s" % fs)
kpi_name = fs[1]
kpi_value = float(fs[2])
#kpi_map[kpi_name] = kpi_value
yield kpi_name, kpi_value
#return kpi_map
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
print("*****")
print(log)
print("****")
log_to_ce(log)
......@@ -23,7 +23,7 @@ add_arg('dataset', str, 'pascalvoc', "coco2014, coco2017, and pascalv
add_arg('model_save_dir', str, 'model', "The path to save model.")
add_arg('pretrained_model', str, 'pretrained/ssd_mobilenet_v1_coco/', "The init model path.")
add_arg('apply_distort', bool, True, "Whether apply distort.")
add_arg('apply_expand', bool, True, "Whether appley expand.")
add_arg('apply_expand', bool, True, "Whether apply expand.")
add_arg('nms_threshold', float, 0.45, "NMS threshold.")
add_arg('ap_version', str, '11point', "integral, 11point.")
add_arg('resize_h', int, 300, "The resized image height.")
......@@ -32,10 +32,8 @@ add_arg('mean_value_B', float, 127.5, "Mean value for B channel which will
add_arg('mean_value_G', float, 127.5, "Mean value for G channel which will be subtracted.") #116.78
add_arg('mean_value_R', float, 127.5, "Mean value for R channel which will be subtracted.") #103.94
add_arg('is_toy', int, 0, "Toy for quick debug, 0 means using all data, while n means using only n sample.")
add_arg('for_model_ce', bool, False, "Use CE to evaluate the model")
add_arg('data_dir', str, 'data/pascalvoc', "data directory")
add_arg('skip_batch_num', int, 5, "the num of minibatch to skip.")
add_arg('iterations', int, 120, "mini batchs.")
add_arg('enable_ce', bool, False, "Whether use CE to evaluate the model")
#yapf: enable
......@@ -48,6 +46,9 @@ def train(args,
num_passes,
model_save_dir,
pretrained_model=None):
if args.enable_ce:
fluid.framework.default_startup_program().random_seed = 111
image_shape = [3, data_args.resize_h, data_args.resize_w]
if 'coco' in data_args.dataset:
num_classes = 91
......@@ -121,8 +122,12 @@ def train(args,
train_exe = fluid.ParallelExecutor(
use_cuda=args.use_gpu, loss_name=loss.name)
train_reader = paddle.batch(
reader.train(data_args, train_file_list), batch_size=batch_size)
if not args.enable_ce:
train_reader = paddle.batch(
reader.train(data_args, train_file_list), batch_size=batch_size)
else:
train_reader = paddle.batch(
reader.train(data_args, train_file_list, False), batch_size=batch_size)
test_reader = paddle.batch(
reader.test(data_args, val_file_list), batch_size=batch_size)
feeder = fluid.DataFeeder(
......@@ -140,32 +145,32 @@ def train(args,
def test(pass_id, best_map):
_, accum_map = map_eval.get_map_var()
map_eval.reset(exe)
every_pass_map=[]
for batch_id, data in enumerate(test_reader()):
test_map, = exe.run(test_program,
feed=feeder.feed(data),
fetch_list=[accum_map])
if batch_id % 20 == 0:
every_pass_map.append(test_map)
print("Batch {0}, map {1}".format(batch_id, test_map))
mean_map = np.mean(every_pass_map)
if test_map[0] > best_map:
best_map = test_map[0]
save_model('best_model')
print("Pass {0}, test map {1}".format(pass_id, test_map))
return best_map
return best_map, mean_map
train_num = 0
total_train_time = 0.0
total_time = 0.0
for pass_id in range(num_passes):
epoch_idx = pass_id + 1
start_time = time.time()
prev_start_time = start_time
# end_time = 0
every_pass_loss = []
iter = 0
pass_duration = 0.0
for batch_id, data in enumerate(train_reader()):
prev_start_time = start_time
start_time = time.time()
if args.for_model_ce and iter == args.iterations:
break
if len(data) < (devices_num * 2):
print("There are too few data to train on all devices.")
continue
......@@ -176,34 +181,31 @@ def train(args,
loss_v, = exe.run(fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[loss])
# end_time = time.time()
loss_v = np.mean(np.array(loss_v))
every_pass_loss.append(loss_v)
if batch_id % 20 == 0:
print("Pass {0}, batch {1}, loss {2}, time {3}".format(
pass_id, batch_id, loss_v, start_time - prev_start_time))
if args.for_model_ce and iter >= args.skip_batch_num or pass_id != 0:
batch_duration = time.time() - start_time
pass_duration += batch_duration
train_num += len(data)
every_pass_loss.append(loss_v)
iter += 1
total_train_time += pass_duration
if args.for_model_ce and pass_id == num_passes - 1:
examples_per_sec = train_num / total_train_time
cost = np.mean(every_pass_loss)
with open("train_speed_factor.txt", 'w') as f:
f.write('{:f}\n'.format(examples_per_sec))
with open("train_cost_factor.txt", 'a+') as f:
f.write('{:f}\n'.format(cost))
best_map = test(pass_id, best_map)
end_time = time.time()
best_map, mean_map = test(pass_id, best_map)
if args.enable_ce and pass_id == 1:
total_time += end_time - start_time
train_avg_loss = np.mean(every_pass_loss)
if devices_num == 1:
print ("kpis train_cost %s" % train_avg_loss)
print ("kpis test_acc %s" % mean_map)
print ("kpis train_speed %s" % (total_time / epoch_idx))
else:
print ("kpis train_cost_card%s %s" % (devices_num, train_avg_loss))
print ("kpis test_acc_card%s %s" % (devices_num, mean_map))
print ("kpis train_speed_card%s %f" % (devices_num, total_time / epoch_idx))
if pass_id % 10 == 0 or pass_id == num_passes - 1:
save_model(str(pass_id))
print("Best test map {0}".format(best_map))
if __name__ == '__main__':
args = parser.parse_args()
print_arguments(args)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册