未验证 提交 03b24f8d 编写于 作者: X xuezhong 提交者: GitHub

Merge pull request #1425 from xuezhong/add_ce

Add ce
#!/bin/bash
DATA_PATH=./data
if [ ! -e $DATA_PATH/demo ] ; then
mkdir -p $DATA_PATH/demo
if [ ! -e $DATA_PATH/demo.tgz ] ; then
cd $DATA_PATH
wget -c --no-check-certificate http://dureader.gz.bcebos.com/demo.tgz
cd -
fi
tar -zxf $DATA_PATH/demo.tgz -C $DATA_PATH/demo
fi
train(){
python -u run.py \
--trainset 'data/demo/search.train.json' \
--devset 'data/demo/search.dev.json' \
--testset 'data/demo/search.test.json' \
--vocab_dir 'data/demo/' \
--use_gpu true \
--save_dir ./models \
--pass_num 1 \
--learning_rate 0.001 \
--batch_size 32 \
--embed_size 300 \
--hidden_size 150 \
--max_p_num 5 \
--max_p_len 500 \
--max_q_len 60 \
--max_a_len 200 \
--drop_rate 0.2 \
--log_interval 1 \
--enable_ce \
--train
}
cudaid=${single:=0} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
train | python _ce.py
cudaid=${multi:=0,1,2,3} # use 0,1,2,3 card as default
export CUDA_VISIBLE_DEVICES=$cudaid
train | python _ce.py
####this file is only used for continuous evaluation test!
import os
import sys
#sys.path.insert(0, os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_cost_card1_kpi = CostKpi('train_cost_card1', 0.02, 0, actived=True)
test_cost_card1_kpi = CostKpi('test_cost_card1', 0.005, 0, actived=True)
train_duration_card1_kpi = DurationKpi(
'train_duration_card1', 0.06, 0, actived=True)
train_cost_card4_kpi = CostKpi('train_cost_card4', 0.01, 0, actived=True)
test_cost_card4_kpi = CostKpi('test_cost_card4', 0.005, 0, actived=True)
train_duration_card4_kpi = DurationKpi(
'train_duration_card4', 0.06, 0, actived=True)
tracking_kpis = [
train_cost_card1_kpi,
test_cost_card1_kpi,
train_duration_card1_kpi,
train_cost_card4_kpi,
test_cost_card4_kpi,
train_duration_card4_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
print("-----%s" % fs)
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
print("*****")
print(log)
print("****")
log_to_ce(log)
......@@ -120,5 +120,9 @@ def parse_args():
'--result_name',
default='test_result',
help='the file name of the results')
parser.add_argument(
"--enable_ce",
action='store_true',
help="If set, run the task with continuous evaluation logs.")
args = parser.parse_args()
return args
......@@ -21,6 +21,7 @@ if [[ -d preprocessed ]] && [[ -d raw ]]; then
exit 0
else
wget -c --no-check-certificate http://dureader.gz.bcebos.com/dureader_preprocessed.zip
wget -c --no-check-certificate http://dureader.gz.bcebos.com/demo.tgz
fi
if md5sum --status -c md5sum.txt; then
......
......@@ -152,7 +152,7 @@ class BRCDataset(object):
batch_data['passage_token_ids'].append(passage_token_ids)
batch_data['passage_length'].append(
min(len(passage_token_ids), self.max_p_len))
# record the start passage index of current doc
# record the start passage index of current sample
passade_idx_offset = sum(batch_data['passage_num'])
batch_data['passage_num'].append(count)
gold_passage_offset = 0
......
......@@ -248,18 +248,18 @@ def validation(inference_program, avg_cost, s_probs, e_probs, match, feed_order,
n_batch_loss / n_batch_cnt)))
n_batch_loss = 0.0
n_batch_cnt = 0
batch_offset = 0
for idx, batch in enumerate(batch_list):
#one batch
batch_size = len(batch['raw_data'])
batch_range = match_lod[0][idx * batch_size:(idx + 1) * batch_size +
batch_range = match_lod[0][batch_offset:batch_offset + batch_size +
1]
batch_lod = [[batch_range[x], batch_range[x + 1]]
for x in range(len(batch_range[:-1]))]
start_prob_batch = start_probs_m[idx * batch_size:(idx + 1) *
batch_size]
end_prob_batch = end_probs_m[idx * batch_size:(idx + 1) *
batch_size]
start_prob_batch = start_probs_m[batch_offset:batch_offset +
batch_size + 1]
end_prob_batch = end_probs_m[batch_offset:batch_offset + batch_size
+ 1]
for sample, start_prob_inst, end_prob_inst, inst_range in zip(
batch['raw_data'], start_prob_batch, end_prob_batch,
batch_lod):
......@@ -284,6 +284,7 @@ def validation(inference_program, avg_cost, s_probs, e_probs, match, feed_order,
'yesno_answers': []
}
ref_answers.append(ref)
batch_offset = batch_offset + batch_size
result_dir = args.result_dir
result_prefix = args.result_name
......@@ -346,8 +347,9 @@ def train(logger, args):
# build model
main_program = fluid.Program()
startup_prog = fluid.Program()
main_program.random_seed = args.random_seed
startup_prog.random_seed = args.random_seed
if args.enable_ce:
main_program.random_seed = args.random_seed
startup_prog.random_seed = args.random_seed
with fluid.program_guard(main_program, startup_prog):
with fluid.unique_name.guard():
avg_cost, s_probs, e_probs, match, feed_order = rc_model.rc_model(
......@@ -405,7 +407,10 @@ def train(logger, args):
for pass_id in range(1, args.pass_num + 1):
pass_start_time = time.time()
pad_id = vocab.get_id(vocab.pad_token)
train_reader = lambda:brc_data.gen_mini_batches('train', args.batch_size, pad_id, shuffle=False)
if args.enable_ce:
train_reader = lambda:brc_data.gen_mini_batches('train', args.batch_size, pad_id, shuffle=False)
else:
train_reader = lambda:brc_data.gen_mini_batches('train', args.batch_size, pad_id, shuffle=True)
train_reader = read_multiple(train_reader, dev_count)
log_every_n_batch, n_batch_loss = args.log_interval, 0
total_num, total_loss = 0, 0
......@@ -420,6 +425,8 @@ def train(logger, args):
n_batch_loss += cost_train
total_loss += cost_train * args.batch_size * dev_count
if args.enable_ce and batch_id >= 100:
break
if log_every_n_batch > 0 and batch_id % log_every_n_batch == 0:
print_para(main_program, parallel_executor, logger,
args)
......@@ -464,6 +471,14 @@ def train(logger, args):
executor=exe,
dirname=model_path,
main_program=main_program)
if args.enable_ce: # For CE
print("kpis\ttrain_cost_card%d\t%f" %
(dev_count, total_loss / total_num))
if brc_data.dev_set is not None:
print("kpis\ttest_cost_card%d\t%f" %
(dev_count, eval_loss))
print("kpis\ttrain_duration_card%d\t%f" %
(dev_count, time_consumed))
def evaluate(logger, args):
......@@ -481,8 +496,6 @@ def evaluate(logger, args):
# build model
main_program = fluid.Program()
startup_prog = fluid.Program()
main_program.random_seed = args.random_seed
startup_prog.random_seed = args.random_seed
with fluid.program_guard(main_program, startup_prog):
with fluid.unique_name.guard():
avg_cost, s_probs, e_probs, match, feed_order = rc_model.rc_model(
......@@ -530,8 +543,6 @@ def predict(logger, args):
# build model
main_program = fluid.Program()
startup_prog = fluid.Program()
main_program.random_seed = args.random_seed
startup_prog.random_seed = args.random_seed
with fluid.program_guard(main_program, startup_prog):
with fluid.unique_name.guard():
avg_cost, s_probs, e_probs, match, feed_order = rc_model.rc_model(
......@@ -599,8 +610,9 @@ def prepare(logger, args):
if __name__ == '__main__':
args = parse_args()
random.seed(args.random_seed)
np.random.seed(args.random_seed)
if args.enable_ce:
random.seed(args.random_seed)
np.random.seed(args.random_seed)
logger = logging.getLogger("brc")
logger.setLevel(logging.INFO)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册