提交 0c847657 编写于 作者: G guochaorong

set ce flag for language_model

上级 3812d044
...@@ -8,7 +8,7 @@ export CUDA_VISIBLE_DEVICES=$cudaid ...@@ -8,7 +8,7 @@ export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py | python _ce.py FLAGS_benchmark=true python train.py | python _ce.py
cudaid=${language_model_m:=0,1,2,3} # use 0-th card as default cudaid=${language_model_m:=0,1,2,3} # use 0,1,2,3 card as default
export CUDA_VISIBLE_DEVICES=$cudaid export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py | python _ce.py FLAGS_benchmark=true python train.py | python _ce.py
...@@ -4,14 +4,25 @@ import time ...@@ -4,14 +4,25 @@ import time
import numpy as np import numpy as np
import math import math
import argparse
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle import paddle
import utils import utils
# random seed must set before configuring the network. SEED = 102
fluid.default_startup_program().random_seed = 102
def parse_args():
parser = argparse.ArgumentParser("language_model benchmark.")
parser.add_argument(
'--enable_ce',
action='store_true',
help='If set, run \
the task with continuous evaluation logs.')
args = parser.parse_args()
return args
def network(src, dst, vocab_size, hid_size, init_low_bound, init_high_bound): def network(src, dst, vocab_size, hid_size, init_low_bound, init_high_bound):
""" network definition """ """ network definition """
...@@ -66,6 +77,11 @@ def train(train_reader, ...@@ -66,6 +77,11 @@ def train(train_reader,
init_low_bound=-0.04, init_low_bound=-0.04,
init_high_bound=0.04): init_high_bound=0.04):
""" train network """ """ train network """
args = parse_args()
if args.enable_ce:
# random seed must set before configuring the network.
fluid.default_startup_program().random_seed = SEED
vocab_size = len(vocab) vocab_size = len(vocab)
#Input data #Input data
...@@ -77,7 +93,7 @@ def train(train_reader, ...@@ -77,7 +93,7 @@ def train(train_reader,
# Train program # Train program
avg_cost = None avg_cost = None
cost = network(src_wordseq, dst_wordseq, vocab_size, hid_size, cost = network(src_wordseq, dst_wordseq, vocab_size, hid_size,
init_low_bound, init_high_bound) init_low_bound, init_high_bound)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(x=cost)
# Optimization to minimize lost # Optimization to minimize lost
...@@ -97,7 +113,7 @@ def train(train_reader, ...@@ -97,7 +113,7 @@ def train(train_reader,
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name) train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
total_time = 0.0 total_time = 0.0
fetch_list=[avg_cost.name] fetch_list = [avg_cost.name]
for pass_idx in xrange(pass_num): for pass_idx in xrange(pass_num):
epoch_idx = pass_idx + 1 epoch_idx = pass_idx + 1
print "epoch_%d start" % epoch_idx print "epoch_%d start" % epoch_idx
...@@ -111,12 +127,11 @@ def train(train_reader, ...@@ -111,12 +127,11 @@ def train(train_reader,
map(lambda x: x[0], data), place) map(lambda x: x[0], data), place)
lod_dst_wordseq = utils.to_lodtensor( lod_dst_wordseq = utils.to_lodtensor(
map(lambda x: x[1], data), place) map(lambda x: x[1], data), place)
ret_avg_cost = train_exe.run( ret_avg_cost = train_exe.run(feed={
feed={ "src_wordseq": lod_src_wordseq,
"src_wordseq": lod_src_wordseq, "dst_wordseq": lod_dst_wordseq
"dst_wordseq": lod_dst_wordseq },
}, fetch_list=fetch_list)
fetch_list=fetch_list)
avg_ppl = np.exp(ret_avg_cost[0]) avg_ppl = np.exp(ret_avg_cost[0])
newest_ppl = np.mean(avg_ppl) newest_ppl = np.mean(avg_ppl)
if i % 100 == 0: if i % 100 == 0:
...@@ -124,39 +139,44 @@ def train(train_reader, ...@@ -124,39 +139,44 @@ def train(train_reader,
t1 = time.time() t1 = time.time()
total_time += t1 - t0 total_time += t1 - t0
print "epoch:%d num_steps:%d time_cost(s):%f" % ( print "epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i,
epoch_idx, i, total_time / epoch_idx) total_time / epoch_idx)
if pass_idx == pass_num - 1: if pass_idx == pass_num - 1 and args.enable_ce:
#Note: The following logs are special for CE monitoring. #Note: The following logs are special for CE monitoring.
#Other situations do not need to care about these logs. #Other situations do not need to care about these logs.
gpu_num = get_cards() gpu_num = get_cards()
if gpu_num == 1: if gpu_num == 1:
print("kpis imikolov_20_pass_duration %s" % (total_time / epoch_idx)) print("kpis imikolov_20_pass_duration %s" %
(total_time / epoch_idx))
print("kpis imikolov_20_avg_ppl %s" % newest_ppl) print("kpis imikolov_20_avg_ppl %s" % newest_ppl)
else: else:
print("kpis imikolov_20_pass_duration_card%s %s" % \ print("kpis imikolov_20_pass_duration_card%s %s" % \
(gpu_num, total_time / epoch_idx)) (gpu_num, total_time / epoch_idx))
print("kpis imikolov_20_avg_ppl_card%s %s" % (gpu_num, newest_ppl)) print("kpis imikolov_20_avg_ppl_card%s %s" %
(gpu_num, newest_ppl))
save_dir = "%s/epoch_%d" % (model_dir, epoch_idx) save_dir = "%s/epoch_%d" % (model_dir, epoch_idx)
feed_var_names = ["src_wordseq", "dst_wordseq"] feed_var_names = ["src_wordseq", "dst_wordseq"]
fetch_vars = [avg_cost] fetch_vars = [avg_cost]
fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, exe)
exe)
print("model saved in %s" % save_dir) print("model saved in %s" % save_dir)
print("finish training") print("finish training")
def get_cards(): def get_cards():
cards = os.environ.get('CUDA_VISIBLE_DEVICES') cards = os.environ.get('CUDA_VISIBLE_DEVICES')
num = len(cards.split(",")) num = len(cards.split(","))
return num return num
def train_net(): def train_net():
""" do training """ """ do training """
batch_size = 20 batch_size = 20
args = parse_args()
vocab, train_reader, test_reader = utils.prepare_data( vocab, train_reader, test_reader = utils.prepare_data(
batch_size=batch_size * get_cards(), buffer_size=1000, word_freq_threshold=0) batch_size=batch_size * get_cards(), buffer_size=1000, \
word_freq_threshold=0, enable_ce = args.enable_ce)
train( train(
train_reader=train_reader, train_reader=train_reader,
vocab=vocab, vocab=vocab,
......
...@@ -5,6 +5,7 @@ import numpy as np ...@@ -5,6 +5,7 @@ import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle import paddle
def to_lodtensor(data, place): def to_lodtensor(data, place):
""" convert to LODtensor """ """ convert to LODtensor """
seq_lens = [len(seq) for seq in data] seq_lens = [len(seq) for seq in data]
...@@ -21,17 +22,28 @@ def to_lodtensor(data, place): ...@@ -21,17 +22,28 @@ def to_lodtensor(data, place):
return res return res
def prepare_data(batch_size, buffer_size=1000, word_freq_threshold=0): def prepare_data(batch_size,
buffer_size=1000,
word_freq_threshold=0,
enable_ce=False):
""" prepare the English Pann Treebank (PTB) data """ """ prepare the English Pann Treebank (PTB) data """
vocab = paddle.dataset.imikolov.build_dict(word_freq_threshold) vocab = paddle.dataset.imikolov.build_dict(word_freq_threshold)
train_reader = paddle.batch( if enable_ce:
paddle.reader.shuffle( train_reader = paddle.batch(
paddle.dataset.imikolov.train( paddle.dataset.imikolov.train(
vocab, vocab,
buffer_size, buffer_size,
data_type=paddle.dataset.imikolov.DataType.SEQ), data_type=paddle.dataset.imikolov.DataType.SEQ),
buf_size=buffer_size), batch_size)
batch_size) else:
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imikolov.train(
vocab,
buffer_size,
data_type=paddle.dataset.imikolov.DataType.SEQ),
buf_size=buffer_size),
batch_size)
test_reader = paddle.batch( test_reader = paddle.batch(
paddle.dataset.imikolov.test( paddle.dataset.imikolov.test(
vocab, buffer_size, data_type=paddle.dataset.imikolov.DataType.SEQ), vocab, buffer_size, data_type=paddle.dataset.imikolov.DataType.SEQ),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册