提交 5165c4c2 编写于 作者: G guosheng

Fix ce for multi-devices in Transformer

上级 98a3f9a2
......@@ -7,14 +7,22 @@ from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.01, 0, actived=True)
test_cost_kpi = CostKpi('test_cost', 0.005, 0, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.06, 0, actived=True)
train_cost_card1_kpi = CostKpi('train_cost_card1', 0.01, 0, actived=True)
test_cost_card1_kpi = CostKpi('test_cost_card1', 0.005, 0, actived=True)
train_duration_card1_kpi = DurationKpi(
'train_duration_card1', 0.06, 0, actived=True)
train_cost_card4_kpi = CostKpi('train_cost_card4', 0.01, 0, actived=True)
test_cost_card4_kpi = CostKpi('test_cost_card4', 0.005, 0, actived=True)
train_duration_card4_kpi = DurationKpi(
'train_duration_card4', 0.06, 0, actived=True)
tracking_kpis = [
train_cost_kpi,
test_cost_kpi,
train_duration_kpi,
train_cost_card1_kpi,
test_cost_card1_kpi,
train_duration_card1_kpi,
train_cost_card4_kpi,
test_cost_card4_kpi,
train_duration_card4_kpi,
]
......
......@@ -463,9 +463,9 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
"pass_" + str(pass_id) + ".infer.model"),
data_input_names[:-2] + util_input_names, [predict], exe)
if args.enable_ce: # For CE
print("kpis\ttrain_cost\t%f" % total_avg_cost)
print("kpis\ttest_cost\t%f" % val_avg_cost)
print("kpis\ttrain_duration\t%f" % time_consumed)
print("kpis\ttrain_cost_card%d\t%f" % (dev_count, total_avg_cost))
print("kpis\ttest_cost_card%d\t%f" % (dev_count, val_avg_cost))
print("kpis\ttrain_duration_card%d\t%f" % (dev_count, time_consumed))
def train(args):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册