提交 fd28d165 编写于 作者: u010070587's avatar u010070587 提交者: Yibing Liu

add understand_sentiment ce (#731)

上级 e2f16f37
#!/bin/bash
#This file is only used for continuous evaluation.
export FLAGS_cudnn_deterministic=true
export CUDA_VISIBLE_DEVICES=0
python train_conv.py --use_gpu 1 --num_epochs=1 --enable_ce | python _ce.py
python train_dyn_rnn.py --use_gpu 1 --num_epochs=1 --enable_ce | python _ce.py
python train_stacked_lstm.py --use_gpu 1 --num_epochs=1 --enable_ce | python _ce.py
### This file is only used for continuous evaluation test!
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi
from kpi import AccKpi
conv_train_cost_kpi = CostKpi(
'conv_train_cost', 0.02, 0, actived=True, desc='train cost')
conv_train_acc_kpi = AccKpi(
'conv_train_acc', 0.02, 0, actived=True, desc='train acc')
conv_test_cost_kpi = CostKpi(
'conv_test_cost', 0.02, 0, actived=True, desc='test cost')
conv_test_acc_kpi = AccKpi(
'conv_test_acc', 0.02, 0, actived=True, desc='test acc')
rnn_train_cost_kpi = CostKpi(
'rnn_train_cost', 0.02, 0, actived=True, desc='train cost')
rnn_train_acc_kpi = AccKpi(
'rnn_train_acc', 0.02, 0, actived=True, desc='train acc')
rnn_test_cost_kpi = CostKpi(
'rnn_test_cost', 0.02, 0, actived=True, desc='test cost')
rnn_test_acc_kpi = AccKpi(
'rnn_test_acc', 0.02, 0, actived=True, desc='test acc')
lstm_train_cost_kpi = CostKpi(
'lstm_train_cost', 0.02, 0, actived=True, desc='train cost')
lstm_train_acc_kpi = AccKpi(
'lstm_train_acc', 0.02, 0, actived=True, desc='train acc')
lstm_test_cost_kpi = CostKpi(
'lstm_test_cost', 0.02, 0, actived=True, desc='test cost')
lstm_test_acc_kpi = AccKpi(
'lstm_test_acc', 0.02, 0, actived=True, desc='test acc')
tracking_kpis = [
conv_train_cost_kpi, conv_train_acc_kpi, conv_test_cost_kpi,
conv_test_acc_kpi, rnn_train_cost_kpi, rnn_train_acc_kpi, rnn_test_cost_kpi,
rnn_test_acc_kpi, lstm_train_cost_kpi, lstm_train_acc_kpi,
lstm_test_cost_kpi, lstm_test_acc_kpi
]
def parse_log(log):
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
......@@ -19,6 +19,7 @@ import paddle.fluid as fluid
import numpy as np
import sys
import math
import argparse
CLASS_DIM = 2
EMB_DIM = 128
......@@ -26,6 +27,20 @@ HID_DIM = 512
BATCH_SIZE = 128
def parse_args():
parser = argparse.ArgumentParser("conv")
parser.add_argument(
'--enable_ce',
action='store_true',
help="If set, run the task with continuous evaluation logs.")
parser.add_argument(
'--use_gpu', type=int, default=0, help="Whether to use GPU or not.")
parser.add_argument(
'--num_epochs', type=int, default=1, help="number of epochs.")
args = parser.parse_args()
return args
def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
emb = fluid.layers.embedding(
input=data, size=[input_dim, emb_dim], is_sparse=True)
......@@ -74,6 +89,10 @@ def train(use_cuda, params_dirname):
word_dict = paddle.dataset.imdb.word_dict()
print("Reading training data....")
if args.enable_ce:
train_reader = paddle.batch(
paddle.dataset.imdb.train(word_dict), batch_size=BATCH_SIZE)
else:
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.train(word_dict), buf_size=25000),
......@@ -84,10 +103,15 @@ def train(use_cuda, params_dirname):
paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
feed_order = ['words', 'label']
pass_num = 1
pass_num = args.num_epochs
main_program = fluid.default_main_program()
star_program = fluid.default_startup_program()
if args.enable_ce:
main_program.random_seed = 90
star_program.random_seed = 90
prediction = inference_program(word_dict)
train_func_outputs = train_program(prediction)
avg_cost = train_func_outputs[0]
......@@ -147,6 +171,11 @@ def train(use_cuda, params_dirname):
if params_dirname is not None:
fluid.io.save_inference_model(params_dirname, ["words"],
prediction, exe)
if args.enable_ce and epoch_id == pass_num - 1:
print("kpis\tconv_train_cost\t%f" % metrics[0])
print("kpis\tconv_train_acc\t%f" % metrics[1])
print("kpis\tconv_test_cost\t%f" % avg_cost_test)
print("kpis\tconv_test_acc\t%f" % acc_test)
train_loop()
......@@ -211,5 +240,6 @@ def main(use_cuda):
if __name__ == '__main__':
use_cuda = False # set to True if training with GPU
args = parse_args()
use_cuda = args.use_gpu # set to True if training with GPU
main(use_cuda)
......@@ -19,6 +19,7 @@ import paddle.fluid as fluid
import numpy as np
import sys
import math
import argparse
CLASS_DIM = 2
EMB_DIM = 128
......@@ -26,6 +27,20 @@ BATCH_SIZE = 128
LSTM_SIZE = 128
def parse_args():
parser = argparse.ArgumentParser("dyn_rnn")
parser.add_argument(
'--enable_ce',
action='store_true',
help="If set, run the task with continuous evaluation logs.")
parser.add_argument(
'--use_gpu', type=int, default=0, help="Whether to use GPU or not.")
parser.add_argument(
'--num_epochs', type=int, default=1, help="number of epochs.")
args = parser.parse_args()
return args
def dynamic_rnn_lstm(data, input_dim, class_dim, emb_dim, lstm_size):
emb = fluid.layers.embedding(
input=data, size=[input_dim, emb_dim], is_sparse=True)
......@@ -89,6 +104,10 @@ def train(use_cuda, params_dirname):
word_dict = paddle.dataset.imdb.word_dict()
print("Reading training data....")
if args.enable_ce:
train_reader = paddle.batch(
paddle.dataset.imdb.train(word_dict), batch_size=BATCH_SIZE)
else:
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.train(word_dict), buf_size=25000),
......@@ -99,10 +118,15 @@ def train(use_cuda, params_dirname):
paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
feed_order = ['words', 'label']
pass_num = 1
pass_num = args.num_epochs
main_program = fluid.default_main_program()
star_program = fluid.default_startup_program()
if args.enable_ce:
main_program.random_seed = 90
star_program.random_seed = 90
prediction = inference_program(word_dict)
train_func_outputs = train_program(prediction)
avg_cost = train_func_outputs[0]
......@@ -148,9 +172,10 @@ def train(use_cuda, params_dirname):
fetch_list=[var.name for var in train_func_outputs])
if (step_id + 1) % 10 == 0:
#avg_cost_test, acc_test = train_test(test_program, test_reader)
#print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
# step_id, avg_cost_test, acc_test))
avg_cost_test, acc_test = train_test(test_program,
test_reader)
print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
step_id, avg_cost_test, acc_test))
print("Step {0}, Epoch {1} Metrics {2}".format(
step_id, epoch_id, list(map(np.array, metrics))))
......@@ -159,6 +184,11 @@ def train(use_cuda, params_dirname):
if params_dirname is not None:
fluid.io.save_inference_model(params_dirname, ["words"],
prediction, exe)
if args.enable_ce and epoch_id == pass_num - 1:
print("kpis\trnn_train_cost\t%f" % metrics[0])
print("kpis\trnn_train_acc\t%f" % metrics[1])
print("kpis\trnn_test_cost\t%f" % avg_cost_test)
print("kpis\trnn_test_acc\t%f" % acc_test)
train_loop()
......@@ -223,5 +253,6 @@ def main(use_cuda):
if __name__ == '__main__':
use_cuda = False # set to True if training with GPU
args = parse_args()
use_cuda = args.use_gpu # set to True if training with GPU
main(use_cuda)
......@@ -20,13 +20,27 @@ import paddle.fluid as fluid
import numpy as np
import sys
import math
import argparse
CLASS_DIM = 2
EMB_DIM = 128
HID_DIM = 512
STACKED_NUM = 3
BATCH_SIZE = 128
USE_GPU = False
def parse_args():
parser = argparse.ArgumentParser("stacked_lstm")
parser.add_argument(
'--enable_ce',
action='store_true',
help="If set, run the task with continuous evaluation logs.")
parser.add_argument(
'--use_gpu', type=int, default=0, help="Whether to use GPU or not.")
parser.add_argument(
'--num_epochs', type=int, default=1, help="number of epochs.")
args = parser.parse_args()
return args
def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
......@@ -84,6 +98,11 @@ def train(use_cuda, params_dirname):
word_dict = paddle.dataset.imdb.word_dict()
print("Reading training data....")
if args.enable_ce:
train_reader = paddle.batch(
paddle.dataset.imdb.train(word_dict), batch_size=BATCH_SIZE)
else:
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.train(word_dict), buf_size=25000),
......@@ -94,10 +113,15 @@ def train(use_cuda, params_dirname):
paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
feed_order = ['words', 'label']
pass_num = 1
pass_num = args.num_epochs
main_program = fluid.default_main_program()
star_program = fluid.default_startup_program()
if args.enable_ce:
main_program.random_seed = 90
star_program.random_seed = 90
prediction = inference_program(word_dict)
train_func_outputs = train_program(prediction)
avg_cost = train_func_outputs[0]
......@@ -134,7 +158,7 @@ def train(use_cuda, params_dirname):
main_program.global_block().var(var_name) for var_name in feed_order
]
feeder = fluid.DataFeeder(feed_list=feed_var_list_loop, place=place)
exe.run(fluid.default_startup_program())
exe.run(star_program)
for epoch_id in range(pass_num):
for step_id, data in enumerate(train_reader()):
......@@ -157,6 +181,11 @@ def train(use_cuda, params_dirname):
if params_dirname is not None:
fluid.io.save_inference_model(params_dirname, ["words"],
prediction, exe)
if args.enable_ce and epoch_id == pass_num - 1:
print("kpis\tlstm_train_cost\t%f" % metrics[0])
print("kpis\tlstm_train_acc\t%f" % metrics[1])
print("kpis\tlstm_test_cost\t%f" % avg_cost_test)
print("kpis\tlstm_test_acc\t%f" % acc_test)
train_loop()
......@@ -221,5 +250,6 @@ def main(use_cuda):
if __name__ == '__main__':
use_cuda = False # set to True if training with GPU
args = parse_args()
use_cuda = args.use_gpu # set to True if training with GPU
main(use_cuda)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册