未验证 提交 69557e42 编写于 作者: H hong 提交者: GitHub

add tokens per sec in transformer (#4874)

* add tokens/sec; test=develop

* change np.array to np.asarray to avoid data copy; test=develop
上级 8a31b1ca
......@@ -175,6 +175,7 @@ def do_train(args):
step_idx = 0
total_batch_num = 0 # this is for benchmark
total_batch_token_num = 0 # this is for benchmark word count
for pass_id in range(args.epoch):
pass_start_time = time.time()
input_field.loader.start()
......@@ -185,12 +186,12 @@ def do_train(args):
return
try:
outs = exe.run(compiled_train_prog,
fetch_list=[sum_cost.name, token_num.name]
if step_idx % args.print_step == 0 else [])
fetch_list=[sum_cost.name, token_num.name])
total_batch_token_num += np.asarray(outs[1]).sum()
if step_idx % args.print_step == 0:
sum_cost_val, token_num_val = np.array(outs[0]), np.array(
outs[1])
sum_cost_val, token_num_val = np.asarray(outs[
0]), np.asarray(outs[1])
# sum the cost from multi-devices
total_sum_cost = sum_cost_val.sum()
total_token_num = token_num_val.sum()
......@@ -207,13 +208,17 @@ def do_train(args):
else:
logging.info(
"step_idx: %d, epoch: %d, batch: %d, avg loss: %f, "
"normalized loss: %f, ppl: %f, speed: %.2f step/s" %
(step_idx, pass_id, batch_id, total_avg_cost,
total_avg_cost - loss_normalizer,
np.exp([min(total_avg_cost, 100)]),
args.print_step / (time.time() - avg_batch_time)))
"normalized loss: %f, ppl: %f, batch speed: %.2f steps/s, token speed: %.2f words/sec"
% (step_idx, pass_id, batch_id, total_avg_cost,
total_avg_cost - loss_normalizer,
np.exp([min(total_avg_cost, 100)]),
args.print_step / (time.time() - avg_batch_time),
total_batch_token_num /
(time.time() - avg_batch_time)))
avg_batch_time = time.time()
total_batch_token_num = 0
if step_idx % args.save_step == 0 and step_idx != 0:
if args.save_model_path:
model_path = os.path.join(args.save_model_path,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册