未验证 提交 4c6882ab 编写于 作者: Q Qingsheng Li 提交者: GitHub

Seq2seq Continuous Evaluation (#1104)

* CE added

* Make the script runnable
上级 26a0312a
###!/bin/bash
####This file is only used for continuous evaluation.
model_file='train.py'
python $model_file --pass_num 1 --learning_rate 0.001 --save_interval 10 --enable_ce
####this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, actived=True)
test_cost_kpi = CostKpi('test_cost', 0.005, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.06, actived=True)
tracking_kpis = [
train_cost_kpi,
test_cost_kpi,
train_duration_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
print("-----%s" % fs)
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
print("*****")
print log
print("****")
log_to_ce(log)
...@@ -89,5 +89,9 @@ def parse_args(): ...@@ -89,5 +89,9 @@ def parse_args():
default=1, default=1,
help="Save the trained model every n passes." help="Save the trained model every n passes."
"(default: %(default)d)") "(default: %(default)d)")
parser.add_argument(
"--enable_ce",
action='store_true',
help="If set, run the task with continuous evaluation logs.")
args = parser.parse_args() args = parser.parse_args()
return args return args
...@@ -35,6 +35,9 @@ import no_attention_model ...@@ -35,6 +35,9 @@ import no_attention_model
def train(): def train():
args = parse_args() args = parse_args()
if args.enable_ce:
framework.default_startup_program().random_seed = 111
# Training process # Training process
if args.no_attention: if args.no_attention:
avg_cost, feed_order = no_attention_model.seq_to_seq_net( avg_cost, feed_order = no_attention_model.seq_to_seq_net(
...@@ -68,6 +71,7 @@ def train(): ...@@ -68,6 +71,7 @@ def train():
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
if not args.enable_ce:
train_batch_generator = paddle.batch( train_batch_generator = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
paddle.dataset.wmt14.train(args.dict_size), buf_size=1000), paddle.dataset.wmt14.train(args.dict_size), buf_size=1000),
...@@ -79,6 +83,16 @@ def train(): ...@@ -79,6 +83,16 @@ def train():
paddle.dataset.wmt14.test(args.dict_size), buf_size=1000), paddle.dataset.wmt14.test(args.dict_size), buf_size=1000),
batch_size=args.batch_size, batch_size=args.batch_size,
drop_last=False) drop_last=False)
else:
train_batch_generator = paddle.batch(
paddle.dataset.wmt14.train(args.dict_size),
batch_size=args.batch_size,
drop_last=False)
test_batch_generator = paddle.batch(
paddle.dataset.wmt14.test(args.dict_size),
batch_size=args.batch_size,
drop_last=False)
place = core.CUDAPlace(0) if args.use_gpu else core.CPUPlace() place = core.CUDAPlace(0) if args.use_gpu else core.CPUPlace()
exe = Executor(place) exe = Executor(place)
...@@ -123,6 +137,9 @@ def train(): ...@@ -123,6 +137,9 @@ def train():
avg_cost_train = np.array(fetch_outs[0]) avg_cost_train = np.array(fetch_outs[0])
print('pass_id=%d, batch_id=%d, train_loss: %f' % print('pass_id=%d, batch_id=%d, train_loss: %f' %
(pass_id, batch_id, avg_cost_train)) (pass_id, batch_id, avg_cost_train))
# This is for continuous evaluation only
if args.enable_ce and batch_id >= 100:
break
pass_end_time = time.time() pass_end_time = time.time()
test_loss = validation() test_loss = validation()
...@@ -131,6 +148,12 @@ def train(): ...@@ -131,6 +148,12 @@ def train():
print("pass_id=%d, test_loss: %f, words/s: %f, sec/pass: %f" % print("pass_id=%d, test_loss: %f, words/s: %f, sec/pass: %f" %
(pass_id, test_loss, words_per_sec, time_consumed)) (pass_id, test_loss, words_per_sec, time_consumed))
# This log is for continuous evaluation only
if args.enable_ce:
print("kpis train_cost %f" % avg_cost_train)
print("kpis test_cost %f" % test_loss)
print("kpis train_duration %f" % time_consumed)
if pass_id % args.save_interval == 0: if pass_id % args.save_interval == 0:
model_path = os.path.join(args.save_dir, str(pass_id)) model_path = os.path.join(args.save_dir, str(pass_id))
if not os.path.isdir(model_path): if not os.path.isdir(model_path):
......
./neural_machine_translation/rnn_search
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册