提交 f183b3fc 编写于 作者: Z zhengya01 提交者: kolinwei

add ce for auto_dialogue_evaluation (#2237)

上级 f35b0924
#!/bin/sh
export CE_MODE_X=ce
export FLAGS_eager_delete_tensor_gb=0.0
export CUDA_VISIBLE_DEVICES=0
python -u main.py \
--do_train True \
--use_cuda \
--save_path model_files_tmp/matching_pretrained \
--train_path data/unlabel_data/train.ids \
--val_path data/unlabel_data/val.ids \
--print_step 3 \
--num_scan_data 3 | python _ce.py
export CUDA_VISIBLE_DEVICES=0,1,2,3
python -u main.py \
--do_train True \
--use_cuda \
--save_path model_files_tmp/matching_pretrained \
--train_path data/unlabel_data/train.ids \
--val_path data/unlabel_data/val.ids \
--print_step 3 \
--num_scan_data 3 | python _ce.py
# this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi
from kpi import DurationKpi
train_loss_card1 = CostKpi('train_loss_card1', 0.03, 0, actived=True)
train_loss_card4 = CostKpi('train_loss_card4', 0.03, 0, actived=True)
train_duration_card1 = DurationKpi('train_duration_card1', 0.01, 0, actived=True)
train_duration_card4 = DurationKpi('train_duration_card4', 0.01, 0, actived=True)
tracking_kpis = [
train_loss_card1,
train_loss_card4,
train_duration_card1,
train_duration_card4,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
...@@ -167,6 +167,7 @@ def train(args): ...@@ -167,6 +167,7 @@ def train(args):
begin_time = time.time() begin_time = time.time()
sum_cost = 0 sum_cost = 0
ce_cost = 0
for batch in train_batches: for batch in train_batches:
if (args.save_path is not None) and (global_step % args.save_step == 0): if (args.save_path is not None) and (global_step % args.save_step == 0):
best_recall = save_exe(global_step, best_recall) best_recall = save_exe(global_step, best_recall)
...@@ -174,6 +175,7 @@ def train(args): ...@@ -174,6 +175,7 @@ def train(args):
cost = train_with_feed(batch) cost = train_with_feed(batch)
global_step += 1 global_step += 1
sum_cost += cost.mean() sum_cost += cost.mean()
ce_cost = cost.mean()
if global_step % args.print_step == 0: if global_step % args.print_step == 0:
print('training step %s avg loss %s' % (global_step, sum_cost / args.print_step)) print('training step %s avg loss %s' % (global_step, sum_cost / args.print_step))
...@@ -183,6 +185,10 @@ def train(args): ...@@ -183,6 +185,10 @@ def train(args):
train_time += pass_time_cost train_time += pass_time_cost
print("Pass {0}, pass_time_cost {1}" print("Pass {0}, pass_time_cost {1}"
.format(epoch, "%2.2f sec" % pass_time_cost)) .format(epoch, "%2.2f sec" % pass_time_cost))
if "CE_MODE_X" in os.environ and epoch == args.num_scan_data - 1:
card_num = get_cards()
print("kpis\ttrain_duration_card%s\t%s" % (card_num, pass_time_cost))
print("kpis\ttrain_loss_card%s\t%s" % (card_num, ce_cost))
def finetune(args): def finetune(args):
...@@ -436,6 +442,14 @@ def infer(args): ...@@ -436,6 +442,14 @@ def infer(args):
(args.init_model, out_path, t1 - t0)) (args.init_model, out_path, t1 - t0))
def get_cards():
num = 0
cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cards != '':
num = len(cards.split(","))
return num
def main(): def main():
""" """
main main
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册