提交 edc6b362 编写于 作者: B baiyfbupt

kpi fix

上级 f35daf3e
###!/bin/bash
####This file is only used for continuous evaluation.
export MKL_NUM_THREADS=1 export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1 export OMP_NUM_THREADS=1
cudaid=${object_detection_cudaid:=0} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
if [ ! -d "/root/.cache/paddle/dataset/pascalvoc" ];then if [ ! -d "/root/.cache/paddle/dataset/pascalvoc" ];then
mkdir -p /root/.cache/paddle/dataset/pascalvoc mkdir -p /root/.cache/paddle/dataset/pascalvoc
./data/pascalvoc/download.sh ./data/pascalvoc/download.sh
cp -r ./data/pascalvoc/. /home/.cache/paddle/dataset/pascalvoc cp -r ./data/pascalvoc/. /home/.cache/paddle/dataset/pascalvoc
fi fi
FLAGS_benchmark=true python train.py --for_model_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py
cudaid=${object_detection_cudaid:=0}
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py
cudaid=${object_detection_cudaid:=0,1,2,3}
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py
...@@ -8,15 +8,15 @@ from kpi import CostKpi, DurationKpi, AccKpi ...@@ -8,15 +8,15 @@ from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!! #### NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, actived=True) train_cost_kpi = CostKpi('train_cost', 0.02, actived=True)
test_acc_kpi = AccKpi('test_acc', 0.005, actived=True) test_acc_kpi = AccKpi('test_acc', 0.01, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.06, actived=True) train_speed_kpi = AccKpi('train_speed', 0.2, actived=True)
train_acc_kpi = AccKpi('train_acc', 0.005, actived=True) train_speed_card4_kpi = AccKpi('train_speed_card4', 0.2, actived=True)
tracking_kpis = [ tracking_kpis = [
train_acc_kpi,
train_cost_kpi, train_cost_kpi,
test_acc_kpi, test_acc_kpi,
train_duration_kpi, train_speed_kpi,
train_speed_card4_kpi,
] ]
......
...@@ -11,11 +11,6 @@ import reader ...@@ -11,11 +11,6 @@ import reader
from mobilenet_ssd import mobile_net from mobilenet_ssd import mobile_net
from utility import add_arguments, print_arguments from utility import add_arguments, print_arguments
SEED = 90
# random seed must set before configuring the network.
fluid.default_startup_program().random_seed = SEED
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable # yapf: disable
...@@ -38,7 +33,7 @@ add_arg('mean_value_G', float, 127.5, "Mean value for G channel which will ...@@ -38,7 +33,7 @@ add_arg('mean_value_G', float, 127.5, "Mean value for G channel which will
add_arg('mean_value_R', float, 127.5, "Mean value for R channel which will be subtracted.") #103.94 add_arg('mean_value_R', float, 127.5, "Mean value for R channel which will be subtracted.") #103.94
add_arg('is_toy', int, 0, "Toy for quick debug, 0 means using all data, while n means using only n sample.") add_arg('is_toy', int, 0, "Toy for quick debug, 0 means using all data, while n means using only n sample.")
add_arg('data_dir', str, 'data/pascalvoc', "data directory") add_arg('data_dir', str, 'data/pascalvoc', "data directory")
add_arg('for_model_ce', bool, False, "Use CE to evaluate the model") add_arg('enable_ce', bool, False, "Whether use CE to evaluate the model")
#yapf: enable #yapf: enable
...@@ -51,6 +46,9 @@ def train(args, ...@@ -51,6 +46,9 @@ def train(args,
num_passes, num_passes,
model_save_dir, model_save_dir,
pretrained_model=None): pretrained_model=None):
if args.enable_ce:
fluid.framework.default_startup_program().random_seed = 111
image_shape = [3, data_args.resize_h, data_args.resize_w] image_shape = [3, data_args.resize_h, data_args.resize_w]
if 'coco' in data_args.dataset: if 'coco' in data_args.dataset:
num_classes = 91 num_classes = 91
...@@ -124,8 +122,12 @@ def train(args, ...@@ -124,8 +122,12 @@ def train(args,
train_exe = fluid.ParallelExecutor( train_exe = fluid.ParallelExecutor(
use_cuda=args.use_gpu, loss_name=loss.name) use_cuda=args.use_gpu, loss_name=loss.name)
train_reader = paddle.batch( if not args.enable_ce:
reader.train(data_args, train_file_list), batch_size=batch_size) train_reader = paddle.batch(
reader.train(data_args, train_file_list), batch_size=batch_size)
else:
train_reader = paddle.batch(
reader.train(data_args, train_file_list, False), batch_size=batch_size)
test_reader = paddle.batch( test_reader = paddle.batch(
reader.test(data_args, val_file_list), batch_size=batch_size) reader.test(data_args, val_file_list), batch_size=batch_size)
feeder = fluid.DataFeeder( feeder = fluid.DataFeeder(
...@@ -143,17 +145,20 @@ def train(args, ...@@ -143,17 +145,20 @@ def train(args,
def test(pass_id, best_map): def test(pass_id, best_map):
_, accum_map = map_eval.get_map_var() _, accum_map = map_eval.get_map_var()
map_eval.reset(exe) map_eval.reset(exe)
every_pass_map=[]
for batch_id, data in enumerate(test_reader()): for batch_id, data in enumerate(test_reader()):
test_map, = exe.run(test_program, test_map, = exe.run(test_program,
feed=feeder.feed(data), feed=feeder.feed(data),
fetch_list=[accum_map]) fetch_list=[accum_map])
if batch_id % 20 == 0: if batch_id % 20 == 0:
every_pass_map.append(test_map)
print("Batch {0}, map {1}".format(batch_id, test_map)) print("Batch {0}, map {1}".format(batch_id, test_map))
mean_map = np.mean(every_pass_map)
if test_map[0] > best_map: if test_map[0] > best_map:
best_map = test_map[0] best_map = test_map[0]
save_model('best_model') save_model('best_model')
print("Pass {0}, test map {1}".format(pass_id, test_map)) print("Pass {0}, test map {1}".format(pass_id, test_map))
return best_map return best_map, mean_map
total_time = 0.0 total_time = 0.0
for pass_id in range(num_passes): for pass_id in range(num_passes):
...@@ -183,28 +188,24 @@ def train(args, ...@@ -183,28 +188,24 @@ def train(args,
pass_id, batch_id, loss_v, start_time - prev_start_time)) pass_id, batch_id, loss_v, start_time - prev_start_time))
end_time = time.time() end_time = time.time()
if args.for_model_ce: best_map, mean_map = test(pass_id, best_map)
gpu_num = get_cards() if args.enable_ce and pass_id == 1:
total_time += end_time - start_time total_time += end_time - start_time
train_avg_loss = np.mean(every_pass_loss) train_avg_loss = np.mean(every_pass_loss)
if gpu_num == 1: if devices_num == 1:
print ("kpis train_cost %s" % train_avg_loss) print ("kpis train_cost %s" % train_avg_loss)
print ("kpis test_acc %s" % mean_map)
print ("kpis train_speed %s" % (total_time / epoch_idx)) print ("kpis train_speed %s" % (total_time / epoch_idx))
else: else:
print ("kpis train_cost_card%s %s" % (gpu_num, train_avg_loss)) print ("kpis train_cost_card%s %s" % (gpu_num, train_avg_loss))
print ("kpis test_acc_card%s %s" % (gpu_num, mean_map))
print ("kpis train_speed_card%s %f" % (gpu_num, total_time / epoch_idx)) print ("kpis train_speed_card%s %f" % (gpu_num, total_time / epoch_idx))
best_map = test(pass_id, best_map)
if pass_id % 10 == 0 or pass_id == num_passes - 1: if pass_id % 10 == 0 or pass_id == num_passes - 1:
save_model(str(pass_id)) save_model(str(pass_id))
print("Best test map {0}".format(best_map)) print("Best test map {0}".format(best_map))
def get_cards():
cards = os.environ.get('CUDA_VISIBLE_DEVICES')
num = len(cards.split(","))
return num
if __name__ == '__main__': if __name__ == '__main__':
args = parser.parse_args() args = parser.parse_args()
print_arguments(args) print_arguments(args)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册