提交 fd28d165 编写于 作者: u010070587's avatar u010070587 提交者: Yibing Liu

add understand_sentiment ce (#731)

上级 e2f16f37
#!/bin/bash
#This file is only used for continuous evaluation.
export FLAGS_cudnn_deterministic=true
export CUDA_VISIBLE_DEVICES=0
python train_conv.py --use_gpu 1 --num_epochs=1 --enable_ce | python _ce.py
python train_dyn_rnn.py --use_gpu 1 --num_epochs=1 --enable_ce | python _ce.py
python train_stacked_lstm.py --use_gpu 1 --num_epochs=1 --enable_ce | python _ce.py
### This file is only used for continuous evaluation test!
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi
from kpi import AccKpi
conv_train_cost_kpi = CostKpi(
'conv_train_cost', 0.02, 0, actived=True, desc='train cost')
conv_train_acc_kpi = AccKpi(
'conv_train_acc', 0.02, 0, actived=True, desc='train acc')
conv_test_cost_kpi = CostKpi(
'conv_test_cost', 0.02, 0, actived=True, desc='test cost')
conv_test_acc_kpi = AccKpi(
'conv_test_acc', 0.02, 0, actived=True, desc='test acc')
rnn_train_cost_kpi = CostKpi(
'rnn_train_cost', 0.02, 0, actived=True, desc='train cost')
rnn_train_acc_kpi = AccKpi(
'rnn_train_acc', 0.02, 0, actived=True, desc='train acc')
rnn_test_cost_kpi = CostKpi(
'rnn_test_cost', 0.02, 0, actived=True, desc='test cost')
rnn_test_acc_kpi = AccKpi(
'rnn_test_acc', 0.02, 0, actived=True, desc='test acc')
lstm_train_cost_kpi = CostKpi(
'lstm_train_cost', 0.02, 0, actived=True, desc='train cost')
lstm_train_acc_kpi = AccKpi(
'lstm_train_acc', 0.02, 0, actived=True, desc='train acc')
lstm_test_cost_kpi = CostKpi(
'lstm_test_cost', 0.02, 0, actived=True, desc='test cost')
lstm_test_acc_kpi = AccKpi(
'lstm_test_acc', 0.02, 0, actived=True, desc='test acc')
tracking_kpis = [
conv_train_cost_kpi, conv_train_acc_kpi, conv_test_cost_kpi,
conv_test_acc_kpi, rnn_train_cost_kpi, rnn_train_acc_kpi, rnn_test_cost_kpi,
rnn_test_acc_kpi, lstm_train_cost_kpi, lstm_train_acc_kpi,
lstm_test_cost_kpi, lstm_test_acc_kpi
]
def parse_log(log):
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
...@@ -19,6 +19,7 @@ import paddle.fluid as fluid ...@@ -19,6 +19,7 @@ import paddle.fluid as fluid
import numpy as np import numpy as np
import sys import sys
import math import math
import argparse
CLASS_DIM = 2 CLASS_DIM = 2
EMB_DIM = 128 EMB_DIM = 128
...@@ -26,6 +27,20 @@ HID_DIM = 512 ...@@ -26,6 +27,20 @@ HID_DIM = 512
BATCH_SIZE = 128 BATCH_SIZE = 128
def parse_args():
parser = argparse.ArgumentParser("conv")
parser.add_argument(
'--enable_ce',
action='store_true',
help="If set, run the task with continuous evaluation logs.")
parser.add_argument(
'--use_gpu', type=int, default=0, help="Whether to use GPU or not.")
parser.add_argument(
'--num_epochs', type=int, default=1, help="number of epochs.")
args = parser.parse_args()
return args
def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim): def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
emb = fluid.layers.embedding( emb = fluid.layers.embedding(
input=data, size=[input_dim, emb_dim], is_sparse=True) input=data, size=[input_dim, emb_dim], is_sparse=True)
...@@ -74,20 +89,29 @@ def train(use_cuda, params_dirname): ...@@ -74,20 +89,29 @@ def train(use_cuda, params_dirname):
word_dict = paddle.dataset.imdb.word_dict() word_dict = paddle.dataset.imdb.word_dict()
print("Reading training data....") print("Reading training data....")
train_reader = paddle.batch( if args.enable_ce:
paddle.reader.shuffle( train_reader = paddle.batch(
paddle.dataset.imdb.train(word_dict), buf_size=25000), paddle.dataset.imdb.train(word_dict), batch_size=BATCH_SIZE)
batch_size=BATCH_SIZE) else:
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.train(word_dict), buf_size=25000),
batch_size=BATCH_SIZE)
print("Reading testing data....") print("Reading testing data....")
test_reader = paddle.batch( test_reader = paddle.batch(
paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
feed_order = ['words', 'label'] feed_order = ['words', 'label']
pass_num = 1 pass_num = args.num_epochs
main_program = fluid.default_main_program() main_program = fluid.default_main_program()
star_program = fluid.default_startup_program() star_program = fluid.default_startup_program()
if args.enable_ce:
main_program.random_seed = 90
star_program.random_seed = 90
prediction = inference_program(word_dict) prediction = inference_program(word_dict)
train_func_outputs = train_program(prediction) train_func_outputs = train_program(prediction)
avg_cost = train_func_outputs[0] avg_cost = train_func_outputs[0]
...@@ -147,6 +171,11 @@ def train(use_cuda, params_dirname): ...@@ -147,6 +171,11 @@ def train(use_cuda, params_dirname):
if params_dirname is not None: if params_dirname is not None:
fluid.io.save_inference_model(params_dirname, ["words"], fluid.io.save_inference_model(params_dirname, ["words"],
prediction, exe) prediction, exe)
if args.enable_ce and epoch_id == pass_num - 1:
print("kpis\tconv_train_cost\t%f" % metrics[0])
print("kpis\tconv_train_acc\t%f" % metrics[1])
print("kpis\tconv_test_cost\t%f" % avg_cost_test)
print("kpis\tconv_test_acc\t%f" % acc_test)
train_loop() train_loop()
...@@ -211,5 +240,6 @@ def main(use_cuda): ...@@ -211,5 +240,6 @@ def main(use_cuda):
if __name__ == '__main__': if __name__ == '__main__':
use_cuda = False # set to True if training with GPU args = parse_args()
use_cuda = args.use_gpu # set to True if training with GPU
main(use_cuda) main(use_cuda)
...@@ -19,6 +19,7 @@ import paddle.fluid as fluid ...@@ -19,6 +19,7 @@ import paddle.fluid as fluid
import numpy as np import numpy as np
import sys import sys
import math import math
import argparse
CLASS_DIM = 2 CLASS_DIM = 2
EMB_DIM = 128 EMB_DIM = 128
...@@ -26,6 +27,20 @@ BATCH_SIZE = 128 ...@@ -26,6 +27,20 @@ BATCH_SIZE = 128
LSTM_SIZE = 128 LSTM_SIZE = 128
def parse_args():
parser = argparse.ArgumentParser("dyn_rnn")
parser.add_argument(
'--enable_ce',
action='store_true',
help="If set, run the task with continuous evaluation logs.")
parser.add_argument(
'--use_gpu', type=int, default=0, help="Whether to use GPU or not.")
parser.add_argument(
'--num_epochs', type=int, default=1, help="number of epochs.")
args = parser.parse_args()
return args
def dynamic_rnn_lstm(data, input_dim, class_dim, emb_dim, lstm_size): def dynamic_rnn_lstm(data, input_dim, class_dim, emb_dim, lstm_size):
emb = fluid.layers.embedding( emb = fluid.layers.embedding(
input=data, size=[input_dim, emb_dim], is_sparse=True) input=data, size=[input_dim, emb_dim], is_sparse=True)
...@@ -89,20 +104,29 @@ def train(use_cuda, params_dirname): ...@@ -89,20 +104,29 @@ def train(use_cuda, params_dirname):
word_dict = paddle.dataset.imdb.word_dict() word_dict = paddle.dataset.imdb.word_dict()
print("Reading training data....") print("Reading training data....")
train_reader = paddle.batch( if args.enable_ce:
paddle.reader.shuffle( train_reader = paddle.batch(
paddle.dataset.imdb.train(word_dict), buf_size=25000), paddle.dataset.imdb.train(word_dict), batch_size=BATCH_SIZE)
batch_size=BATCH_SIZE) else:
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.train(word_dict), buf_size=25000),
batch_size=BATCH_SIZE)
print("Reading testing data....") print("Reading testing data....")
test_reader = paddle.batch( test_reader = paddle.batch(
paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
feed_order = ['words', 'label'] feed_order = ['words', 'label']
pass_num = 1 pass_num = args.num_epochs
main_program = fluid.default_main_program() main_program = fluid.default_main_program()
star_program = fluid.default_startup_program() star_program = fluid.default_startup_program()
if args.enable_ce:
main_program.random_seed = 90
star_program.random_seed = 90
prediction = inference_program(word_dict) prediction = inference_program(word_dict)
train_func_outputs = train_program(prediction) train_func_outputs = train_program(prediction)
avg_cost = train_func_outputs[0] avg_cost = train_func_outputs[0]
...@@ -148,9 +172,10 @@ def train(use_cuda, params_dirname): ...@@ -148,9 +172,10 @@ def train(use_cuda, params_dirname):
fetch_list=[var.name for var in train_func_outputs]) fetch_list=[var.name for var in train_func_outputs])
if (step_id + 1) % 10 == 0: if (step_id + 1) % 10 == 0:
#avg_cost_test, acc_test = train_test(test_program, test_reader) avg_cost_test, acc_test = train_test(test_program,
#print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( test_reader)
# step_id, avg_cost_test, acc_test)) print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
step_id, avg_cost_test, acc_test))
print("Step {0}, Epoch {1} Metrics {2}".format( print("Step {0}, Epoch {1} Metrics {2}".format(
step_id, epoch_id, list(map(np.array, metrics)))) step_id, epoch_id, list(map(np.array, metrics))))
...@@ -159,6 +184,11 @@ def train(use_cuda, params_dirname): ...@@ -159,6 +184,11 @@ def train(use_cuda, params_dirname):
if params_dirname is not None: if params_dirname is not None:
fluid.io.save_inference_model(params_dirname, ["words"], fluid.io.save_inference_model(params_dirname, ["words"],
prediction, exe) prediction, exe)
if args.enable_ce and epoch_id == pass_num - 1:
print("kpis\trnn_train_cost\t%f" % metrics[0])
print("kpis\trnn_train_acc\t%f" % metrics[1])
print("kpis\trnn_test_cost\t%f" % avg_cost_test)
print("kpis\trnn_test_acc\t%f" % acc_test)
train_loop() train_loop()
...@@ -223,5 +253,6 @@ def main(use_cuda): ...@@ -223,5 +253,6 @@ def main(use_cuda):
if __name__ == '__main__': if __name__ == '__main__':
use_cuda = False # set to True if training with GPU args = parse_args()
use_cuda = args.use_gpu # set to True if training with GPU
main(use_cuda) main(use_cuda)
...@@ -20,13 +20,27 @@ import paddle.fluid as fluid ...@@ -20,13 +20,27 @@ import paddle.fluid as fluid
import numpy as np import numpy as np
import sys import sys
import math import math
import argparse
CLASS_DIM = 2 CLASS_DIM = 2
EMB_DIM = 128 EMB_DIM = 128
HID_DIM = 512 HID_DIM = 512
STACKED_NUM = 3 STACKED_NUM = 3
BATCH_SIZE = 128 BATCH_SIZE = 128
USE_GPU = False
def parse_args():
parser = argparse.ArgumentParser("stacked_lstm")
parser.add_argument(
'--enable_ce',
action='store_true',
help="If set, run the task with continuous evaluation logs.")
parser.add_argument(
'--use_gpu', type=int, default=0, help="Whether to use GPU or not.")
parser.add_argument(
'--num_epochs', type=int, default=1, help="number of epochs.")
args = parser.parse_args()
return args
def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num): def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
...@@ -84,20 +98,30 @@ def train(use_cuda, params_dirname): ...@@ -84,20 +98,30 @@ def train(use_cuda, params_dirname):
word_dict = paddle.dataset.imdb.word_dict() word_dict = paddle.dataset.imdb.word_dict()
print("Reading training data....") print("Reading training data....")
train_reader = paddle.batch(
paddle.reader.shuffle( if args.enable_ce:
paddle.dataset.imdb.train(word_dict), buf_size=25000), train_reader = paddle.batch(
batch_size=BATCH_SIZE) paddle.dataset.imdb.train(word_dict), batch_size=BATCH_SIZE)
else:
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.train(word_dict), buf_size=25000),
batch_size=BATCH_SIZE)
print("Reading testing data....") print("Reading testing data....")
test_reader = paddle.batch( test_reader = paddle.batch(
paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
feed_order = ['words', 'label'] feed_order = ['words', 'label']
pass_num = 1 pass_num = args.num_epochs
main_program = fluid.default_main_program() main_program = fluid.default_main_program()
star_program = fluid.default_startup_program() star_program = fluid.default_startup_program()
if args.enable_ce:
main_program.random_seed = 90
star_program.random_seed = 90
prediction = inference_program(word_dict) prediction = inference_program(word_dict)
train_func_outputs = train_program(prediction) train_func_outputs = train_program(prediction)
avg_cost = train_func_outputs[0] avg_cost = train_func_outputs[0]
...@@ -134,7 +158,7 @@ def train(use_cuda, params_dirname): ...@@ -134,7 +158,7 @@ def train(use_cuda, params_dirname):
main_program.global_block().var(var_name) for var_name in feed_order main_program.global_block().var(var_name) for var_name in feed_order
] ]
feeder = fluid.DataFeeder(feed_list=feed_var_list_loop, place=place) feeder = fluid.DataFeeder(feed_list=feed_var_list_loop, place=place)
exe.run(fluid.default_startup_program()) exe.run(star_program)
for epoch_id in range(pass_num): for epoch_id in range(pass_num):
for step_id, data in enumerate(train_reader()): for step_id, data in enumerate(train_reader()):
...@@ -157,6 +181,11 @@ def train(use_cuda, params_dirname): ...@@ -157,6 +181,11 @@ def train(use_cuda, params_dirname):
if params_dirname is not None: if params_dirname is not None:
fluid.io.save_inference_model(params_dirname, ["words"], fluid.io.save_inference_model(params_dirname, ["words"],
prediction, exe) prediction, exe)
if args.enable_ce and epoch_id == pass_num - 1:
print("kpis\tlstm_train_cost\t%f" % metrics[0])
print("kpis\tlstm_train_acc\t%f" % metrics[1])
print("kpis\tlstm_test_cost\t%f" % avg_cost_test)
print("kpis\tlstm_test_acc\t%f" % acc_test)
train_loop() train_loop()
...@@ -221,5 +250,6 @@ def main(use_cuda): ...@@ -221,5 +250,6 @@ def main(use_cuda):
if __name__ == '__main__': if __name__ == '__main__':
use_cuda = False # set to True if training with GPU args = parse_args()
use_cuda = args.use_gpu # set to True if training with GPU
main(use_cuda) main(use_cuda)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册