提交 b199f192 编写于 作者: Y Yibing Liu

Merge branch 'develop' of upstream into text_cls_ce

#!/bin/bash
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
cudaid=${language_model:=0} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py | python _ce.py
cudaid=${language_model_m:=0,1,2,3} # use 0,1,2,3 card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py | python _ce.py
# this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi
from kpi import DurationKpi
imikolov_20_avg_ppl_kpi = CostKpi('imikolov_20_avg_ppl', 0.2, 0)
imikolov_20_pass_duration_kpi = DurationKpi(
'imikolov_20_pass_duration', 0.02, 0, actived=True)
imikolov_20_avg_ppl_kpi_card4 = CostKpi('imikolov_20_avg_ppl_card4', 0.2, 0)
imikolov_20_pass_duration_kpi_card4 = DurationKpi(
'imikolov_20_pass_duration_card4', 0.03, 0, actived=True)
tracking_kpis = [
imikolov_20_avg_ppl_kpi,
imikolov_20_pass_duration_kpi,
imikolov_20_avg_ppl_kpi_card4,
imikolov_20_pass_duration_kpi_card4,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
import os
import sys
import time
import numpy as np
import math
import argparse
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle
import utils
SEED = 102
def parse_args():
parser = argparse.ArgumentParser("language_model benchmark.")
parser.add_argument(
'--enable_ce',
action='store_true',
help='If set, run \
the task with continuous evaluation logs.')
args = parser.parse_args()
return args
def network(src, dst, vocab_size, hid_size, init_low_bound, init_high_bound):
""" network definition """
......@@ -63,31 +77,26 @@ def train(train_reader,
init_low_bound=-0.04,
init_high_bound=0.04):
""" train network """
args = parse_args()
if args.enable_ce:
# random seed must set before configuring the network.
fluid.default_startup_program().random_seed = SEED
vocab_size = len(vocab)
#Input data
src_wordseq = fluid.layers.data(
name="src_wordseq", shape=[1], dtype="int64", lod_level=1)
dst_wordseq = fluid.layers.data(
name="dst_wordseq", shape=[1], dtype="int64", lod_level=1)
# Train program
avg_cost = None
if not parallel:
cost = network(src_wordseq, dst_wordseq, vocab_size, hid_size,
init_low_bound, init_high_bound)
avg_cost = fluid.layers.mean(x=cost)
else:
places = fluid.layers.get_places()
pd = fluid.layers.ParallelDo(places)
with pd.do():
cost = network(
pd.read_input(src_wordseq),
pd.read_input(dst_wordseq), vocab_size, hid_size,
init_low_bound, init_high_bound)
pd.write_output(cost)
cost = pd()
avg_cost = fluid.layers.mean(x=cost)
cost = network(src_wordseq, dst_wordseq, vocab_size, hid_size,
init_low_bound, init_high_bound)
avg_cost = fluid.layers.mean(x=cost)
# Optimization to minimize lost
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.exponential_decay(
learning_rate=base_lr,
......@@ -96,39 +105,56 @@ def train(train_reader,
staircase=True))
sgd_optimizer.minimize(avg_cost)
# Initialize executor
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
total_time = 0.0
fetch_list = [avg_cost.name]
for pass_idx in xrange(pass_num):
epoch_idx = pass_idx + 1
print "epoch_%d start" % epoch_idx
t0 = time.time()
i = 0
newest_ppl = 0
for data in train_reader():
i += 1
lod_src_wordseq = utils.to_lodtensor(
map(lambda x: x[0], data), place)
lod_dst_wordseq = utils.to_lodtensor(
map(lambda x: x[1], data), place)
ret_avg_cost = exe.run(fluid.default_main_program(),
feed={
"src_wordseq": lod_src_wordseq,
"dst_wordseq": lod_dst_wordseq
},
fetch_list=[avg_cost],
use_program_cache=True)
avg_ppl = math.exp(ret_avg_cost[0])
ret_avg_cost = train_exe.run(feed={
"src_wordseq": lod_src_wordseq,
"dst_wordseq": lod_dst_wordseq
},
fetch_list=fetch_list)
avg_ppl = np.exp(ret_avg_cost[0])
newest_ppl = np.mean(avg_ppl)
if i % 100 == 0:
print "step:%d ppl:%.3f" % (i, avg_ppl)
print "step:%d ppl:%.3f" % (i, newest_ppl)
t1 = time.time()
total_time += t1 - t0
print "epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i,
total_time / epoch_idx)
if pass_idx == pass_num - 1 and args.enable_ce:
#Note: The following logs are special for CE monitoring.
#Other situations do not need to care about these logs.
gpu_num = get_cards()
if gpu_num == 1:
print("kpis imikolov_20_pass_duration %s" %
(total_time / epoch_idx))
print("kpis imikolov_20_avg_ppl %s" % newest_ppl)
else:
print("kpis imikolov_20_pass_duration_card%s %s" % \
(gpu_num, total_time / epoch_idx))
print("kpis imikolov_20_avg_ppl_card%s %s" %
(gpu_num, newest_ppl))
save_dir = "%s/epoch_%d" % (model_dir, epoch_idx)
feed_var_names = ["src_wordseq", "dst_wordseq"]
fetch_vars = [avg_cost]
......@@ -138,11 +164,22 @@ def train(train_reader,
print("finish training")
def get_cards(enable_ce):
if enable_ce:
cards = os.environ.get('CUDA_VISIBLE_DEVICES')
num = len(cards.split(","))
return num
else:
return fluid.core.get_cuda_device_count()
def train_net():
""" do training """
batch_size = 20
args = parse_args()
vocab, train_reader, test_reader = utils.prepare_data(
batch_size=batch_size, buffer_size=1000, word_freq_threshold=0)
batch_size=batch_size * get_cards(args.enable_ce), buffer_size=1000, \
word_freq_threshold=0, enable_ce = args.enable_ce)
train(
train_reader=train_reader,
vocab=vocab,
......@@ -152,7 +189,7 @@ def train_net():
batch_size=batch_size,
pass_num=12,
use_cuda=True,
parallel=False,
parallel=True,
model_dir="model",
init_low_bound=-0.1,
init_high_bound=0.1)
......
......@@ -3,7 +3,7 @@ import time
import numpy as np
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle
def to_lodtensor(data, place):
......@@ -22,17 +22,28 @@ def to_lodtensor(data, place):
return res
def prepare_data(batch_size, buffer_size=1000, word_freq_threshold=0):
def prepare_data(batch_size,
buffer_size=1000,
word_freq_threshold=0,
enable_ce=False):
""" prepare the English Pann Treebank (PTB) data """
vocab = paddle.dataset.imikolov.build_dict(word_freq_threshold)
train_reader = paddle.batch(
paddle.reader.shuffle(
if enable_ce:
train_reader = paddle.batch(
paddle.dataset.imikolov.train(
vocab,
buffer_size,
data_type=paddle.dataset.imikolov.DataType.SEQ),
buf_size=buffer_size),
batch_size)
batch_size)
else:
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imikolov.train(
vocab,
buffer_size,
data_type=paddle.dataset.imikolov.DataType.SEQ),
buf_size=buffer_size),
batch_size)
test_reader = paddle.batch(
paddle.dataset.imikolov.test(
vocab, buffer_size, data_type=paddle.dataset.imikolov.DataType.SEQ),
......
......@@ -7,9 +7,9 @@ from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, actived=True)
test_cost_kpi = CostKpi('test_cost', 0.005, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.06, actived=True)
train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True)
test_cost_kpi = CostKpi('test_cost', 0.005, 0, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.06, 0, actived=True)
tracking_kpis = [
train_cost_kpi,
......
......@@ -151,9 +151,9 @@ def train():
# This log is for continuous evaluation only
if args.enable_ce:
print("kpis train_cost %f" % avg_cost_train)
print("kpis test_cost %f" % test_loss)
print("kpis train_duration %f" % time_consumed)
print("kpis\ttrain_cost\t%f" % avg_cost_train)
print("kpis\ttest_cost\t%f" % test_loss)
print("kpis\ttrain_duration\t%f" % time_consumed)
if pass_id % args.save_interval == 0:
model_path = os.path.join(args.save_dir, str(pass_id))
......
cp -r ./data/pascalvoc/. /home/.cache/paddle/dataset/pascalvoc
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
cudaid=${object_detection_cudaid:=0} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
if [ ! -d "/root/.cache/paddle/dataset/pascalvoc" ];then
mkdir -p /root/.cache/paddle/dataset/pascalvoc
./data/pascalvoc/download.sh
bash ./.move.sh
fi
FLAGS_benchmark=true python train.py --batch_size=64 --num_passes=2 --for_model_ce=True --data_dir=/root/.cache/paddle/dataset/pascalvoc/
###!/bin/bash
####This file is only used for continuous evaluation.
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
if [ ! -d "/root/.cache/paddle/dataset/pascalvoc" ];then
mkdir -p /root/.cache/paddle/dataset/pascalvoc
./data/pascalvoc/download.sh
cp -r ./data/pascalvoc/. /home/.cache/paddle/dataset/pascalvoc
fi
cudaid=${object_detection_cudaid:=0}
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py
cudaid=${object_detection_cudaid:=0,1,2,3}
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --num_passes=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py
####this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True)
test_acc_kpi = AccKpi('test_acc', 0.01, 0, actived=True)
train_speed_kpi = AccKpi('train_speed', 0.2, 0, actived=True)
train_cost_card4_kpi = CostKpi('train_cost_card4', 0.02, 0, actived=True)
test_acc_card4_kpi = AccKpi('test_acc_card4', 0.01, 0, actived=True)
train_speed_card4_kpi = AccKpi('train_speed_card4', 0.2, 0, actived=True)
tracking_kpis = [
train_cost_kpi,
test_acc_kpi,
train_speed_kpi,
train_cost_card4_kpi,
test_acc_card4_kpi,
train_speed_card4_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
#kpi_map = {}
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
print("-----%s" % fs)
kpi_name = fs[1]
kpi_value = float(fs[2])
#kpi_map[kpi_name] = kpi_value
yield kpi_name, kpi_value
#return kpi_map
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
print("*****")
print(log)
print("****")
log_to_ce(log)
import paddle.v2 as paddle
import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
......
......@@ -23,7 +23,7 @@ add_arg('dataset', str, 'pascalvoc', "coco2014, coco2017, and pascalv
add_arg('model_save_dir', str, 'model', "The path to save model.")
add_arg('pretrained_model', str, 'pretrained/ssd_mobilenet_v1_coco/', "The init model path.")
add_arg('apply_distort', bool, True, "Whether apply distort.")
add_arg('apply_expand', bool, True, "Whether appley expand.")
add_arg('apply_expand', bool, True, "Whether apply expand.")
add_arg('nms_threshold', float, 0.45, "NMS threshold.")
add_arg('ap_version', str, '11point', "integral, 11point.")
add_arg('resize_h', int, 300, "The resized image height.")
......@@ -32,10 +32,8 @@ add_arg('mean_value_B', float, 127.5, "Mean value for B channel which will
add_arg('mean_value_G', float, 127.5, "Mean value for G channel which will be subtracted.") #116.78
add_arg('mean_value_R', float, 127.5, "Mean value for R channel which will be subtracted.") #103.94
add_arg('is_toy', int, 0, "Toy for quick debug, 0 means using all data, while n means using only n sample.")
add_arg('for_model_ce', bool, False, "Use CE to evaluate the model")
add_arg('data_dir', str, 'data/pascalvoc', "data directory")
add_arg('skip_batch_num', int, 5, "the num of minibatch to skip.")
add_arg('iterations', int, 120, "mini batchs.")
add_arg('enable_ce', bool, False, "Whether use CE to evaluate the model")
#yapf: enable
......@@ -48,6 +46,9 @@ def train(args,
num_passes,
model_save_dir,
pretrained_model=None):
if args.enable_ce:
fluid.framework.default_startup_program().random_seed = 111
image_shape = [3, data_args.resize_h, data_args.resize_w]
if 'coco' in data_args.dataset:
num_classes = 91
......@@ -121,8 +122,12 @@ def train(args,
train_exe = fluid.ParallelExecutor(
use_cuda=args.use_gpu, loss_name=loss.name)
train_reader = paddle.batch(
reader.train(data_args, train_file_list), batch_size=batch_size)
if not args.enable_ce:
train_reader = paddle.batch(
reader.train(data_args, train_file_list), batch_size=batch_size)
else:
train_reader = paddle.batch(
reader.train(data_args, train_file_list, False), batch_size=batch_size)
test_reader = paddle.batch(
reader.test(data_args, val_file_list), batch_size=batch_size)
feeder = fluid.DataFeeder(
......@@ -140,32 +145,32 @@ def train(args,
def test(pass_id, best_map):
_, accum_map = map_eval.get_map_var()
map_eval.reset(exe)
every_pass_map=[]
for batch_id, data in enumerate(test_reader()):
test_map, = exe.run(test_program,
feed=feeder.feed(data),
fetch_list=[accum_map])
if batch_id % 20 == 0:
every_pass_map.append(test_map)
print("Batch {0}, map {1}".format(batch_id, test_map))
mean_map = np.mean(every_pass_map)
if test_map[0] > best_map:
best_map = test_map[0]
save_model('best_model')
print("Pass {0}, test map {1}".format(pass_id, test_map))
return best_map
return best_map, mean_map
train_num = 0
total_train_time = 0.0
total_time = 0.0
for pass_id in range(num_passes):
epoch_idx = pass_id + 1
start_time = time.time()
prev_start_time = start_time
# end_time = 0
every_pass_loss = []
iter = 0
pass_duration = 0.0
for batch_id, data in enumerate(train_reader()):
prev_start_time = start_time
start_time = time.time()
if args.for_model_ce and iter == args.iterations:
break
if len(data) < (devices_num * 2):
print("There are too few data to train on all devices.")
continue
......@@ -176,34 +181,31 @@ def train(args,
loss_v, = exe.run(fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[loss])
# end_time = time.time()
loss_v = np.mean(np.array(loss_v))
every_pass_loss.append(loss_v)
if batch_id % 20 == 0:
print("Pass {0}, batch {1}, loss {2}, time {3}".format(
pass_id, batch_id, loss_v, start_time - prev_start_time))
if args.for_model_ce and iter >= args.skip_batch_num or pass_id != 0:
batch_duration = time.time() - start_time
pass_duration += batch_duration
train_num += len(data)
every_pass_loss.append(loss_v)
iter += 1
total_train_time += pass_duration
if args.for_model_ce and pass_id == num_passes - 1:
examples_per_sec = train_num / total_train_time
cost = np.mean(every_pass_loss)
with open("train_speed_factor.txt", 'w') as f:
f.write('{:f}\n'.format(examples_per_sec))
with open("train_cost_factor.txt", 'a+') as f:
f.write('{:f}\n'.format(cost))
best_map = test(pass_id, best_map)
end_time = time.time()
best_map, mean_map = test(pass_id, best_map)
if args.enable_ce and pass_id == 1:
total_time += end_time - start_time
train_avg_loss = np.mean(every_pass_loss)
if devices_num == 1:
print ("kpis train_cost %s" % train_avg_loss)
print ("kpis test_acc %s" % mean_map)
print ("kpis train_speed %s" % (total_time / epoch_idx))
else:
print ("kpis train_cost_card%s %s" % (devices_num, train_avg_loss))
print ("kpis test_acc_card%s %s" % (devices_num, mean_map))
print ("kpis train_speed_card%s %f" % (devices_num, total_time / epoch_idx))
if pass_id % 10 == 0 or pass_id == num_passes - 1:
save_model(str(pass_id))
print("Best test map {0}".format(best_map))
if __name__ == '__main__':
args = parser.parse_args()
print_arguments(args)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册