提交 c78016e7 编写于 作者: Z zhengya01 提交者: Bruce

Ce lexical analysis (#2359)

* add ce for lexical_analysis

* add ce for lexical_analysis

* add ce for lexical_analysis

* add ce for lexical_analysis
上级 6fd36438
#!/bin/bash
export FLAGS_fraction_of_gpu_memory_to_use=0.5
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fast_eager_deletion_mode=1
function train() {
python run_sequence_labeling.py \
--do_train True \
--do_test True \
--do_infer False \
--train_data ./data/train.tsv \
--test_data ./data/test.tsv \
--model_save_dir ./models \
--valid_model_per_batches 1000 \
--save_model_per_batches 10000 \
--batch_size 100 \
--epoch 2 \
--use_cuda True \
--traindata_shuffle_buffer 200000 \
--word_emb_dim 768 \
--grnn_hidden_dim 768 \
--bigru_num 2 \
--base_learning_rate 1e-3 \
--emb_learning_rate 5 \
--crf_learning_rate 0.2 \
--word_dict_path ./conf/word.dic \
--label_dict_path ./conf/tag.dic \
--word_rep_dict_path ./conf/q2b.dic \
--enable_ce
}
export CUDA_VISIBLE_DEVICES=0
train | python _ce.py
sleep 20
export CUDA_VISIBLE_DEVICES=0,1,2,3
train | python _ce.py
# this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi
from kpi import DurationKpi
from kpi import AccKpi
each_step_duration_card1 = DurationKpi('each_step_duration_card1', 0.01, 0, actived=True)
train_cost_card1 = CostKpi('train_cost_card1', 0.03, 0, actived=False)
train_precision_card1 = CostKpi('train_precision_card1', 0.03, 0, actived=True)
train_recall_card1 = CostKpi('train_recall_card1', 0.03, 0, actived=True)
train_f1_card1 = CostKpi('train_f1_card1', 0.03, 0, actived=True)
each_step_duration_card4 = DurationKpi('each_step_duration_card4', 0.01, 0, actived=True)
train_cost_card4 = CostKpi('train_cost_card4', 0.03, 0, actived=True)
train_precision_card4 = CostKpi('train_precision_card4', 0.05, 0, actived=True)
train_recall_card4 = CostKpi('train_recall_card4', 0.03, 0, actived=True)
train_f1_card4 = CostKpi('train_f1_card4', 0.05, 0, actived=True)
tracking_kpis = [
each_step_duration_card1,
train_cost_card1,
train_precision_card1,
train_recall_card1,
train_f1_card1,
each_step_duration_card4,
train_cost_card4,
train_precision_card4,
train_recall_card4,
train_f1_card4,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
......@@ -66,6 +66,7 @@ train_g.add_arg("emb_learning_rate", float, 5,
train_g.add_arg("crf_learning_rate", float, 0.2,
"The real learning rate of the embedding layer will be (crf_learning_rate * base_learning_rate).")
parser.add_argument('--enable_ce', action='store_true', help='If set, run the task with continuous evaluation logs.')
args = parser.parse_args()
# yapf: enable.
......@@ -158,6 +159,8 @@ def main(args):
if args.do_train:
train_program = fluid.Program()
if args.random_seed is not None:
train_program.random_seed = args.random_seed
with fluid.program_guard(train_program, startup_program):
with fluid.unique_name.guard():
train_ret = create_model(
......@@ -232,9 +235,11 @@ def main(args):
print("Num train examples: %d" % num_train_examples)
print("Max train steps: %d" % max_train_steps)
ce_info = []
batch_id = 0
for epoch_id in range(args.epoch):
train_ret["pyreader"].start()
ce_time = 0
try:
while True:
start_time = time.time()
......@@ -254,6 +259,8 @@ def main(args):
batch_id += 1
print("[train] batch_id = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f " % (
batch_id, avg_cost, precision, recall, f1_score, end_time - start_time))
ce_time += end_time - start_time
ce_info.append([ce_time, avg_cost, precision, recall, f1_score])
# save checkpoints
if (batch_id % args.save_model_per_batches == 0):
......@@ -269,6 +276,32 @@ def main(args):
fluid.io.save_persistables(exe, save_path, train_program)
train_ret["pyreader"].reset()
# break?
if args.do_train and args.enable_ce:
card_num = get_cards()
ce_cost = 0
ce_f1 = 0
ce_p = 0
ce_r = 0
ce_time = 0
try:
ce_time = ce_info[-2][0]
ce_cost = ce_info[-2][1]
ce_p = ce_info[-2][2]
ce_r = ce_info[-2][3]
ce_f1 = ce_info[-2][4]
except:
print("ce info error")
print("kpis\teach_step_duration_card%s\t%s" %
(card_num, ce_time))
print("kpis\ttrain_cost_card%s\t%f" %
(card_num, ce_cost))
print("kpis\ttrain_precision_card%s\t%f" %
(card_num, ce_p))
print("kpis\ttrain_recall_card%s\t%f" %
(card_num, ce_r))
print("kpis\ttrain_f1_card%s\t%f" %
(card_num, ce_f1))
# only test
if args.do_test:
......@@ -292,5 +325,13 @@ def main(args):
break
def get_cards():
num = 0
cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cards != '':
num = len(cards.split(","))
return num
if __name__ == "__main__":
main(args)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册