未验证 提交 fd2ff205 编写于 作者: H hong 提交者: GitHub

add words/sec; test=develop (#4878)

上级 00b77965
......@@ -158,6 +158,7 @@ def main():
total_loss = 0
word_count = 0.0
batch_times = []
interval_time_start = time.time()
batch_start = time.time()
for batch_id, batch in enumerate(train_data_iter):
......@@ -177,13 +178,15 @@ def main():
batch_times.append(train_batch_cost)
if batch_id > 0 and batch_id % 100 == 0:
print(
"-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, batch_cost: %.5f s, reader_cost: %.5f s"
"-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, batch_cost: %.5f s, reader_cost: %.5f s, speed: %.5f words/s"
% (epoch_id, batch_id, np.exp(total_loss.numpy() /
word_count),
train_batch_cost, batch_reader_end - batch_start))
train_batch_cost, batch_reader_end - batch_start,
word_count / (time.time() - interval_time_start)))
ce_ppl.append(np.exp(total_loss.numpy() / word_count))
total_loss = 0.0
word_count = 0.0
interval_time_start = time.time()
batch_start = time.time()
train_epoch_cost = time.time() - epoch_start
......
......@@ -155,6 +155,7 @@ def do_train(args):
batch_id = 0
batch_start = time.time()
interval_word_num = 0.0
for input_data in train_loader():
if args.max_iter and step_idx == args.max_iter: #NOTE: used for benchmark
return
......@@ -163,6 +164,7 @@ def do_train(args):
(src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos,
trg_slf_attn_bias, trg_src_attn_bias, lbl_word,
lbl_weight) = input_data
logits = transformer(src_word, src_pos, src_slf_attn_bias,
trg_word, trg_pos, trg_slf_attn_bias,
trg_src_attn_bias)
......@@ -180,6 +182,7 @@ def do_train(args):
optimizer.minimize(avg_cost)
transformer.clear_gradients()
interval_word_num += np.prod(src_word.shape)
if step_idx % args.print_step == 0:
total_avg_cost = avg_cost.numpy() * trainer_count
......@@ -193,14 +196,18 @@ def do_train(args):
else:
train_avg_batch_cost = args.print_step / (
time.time() - batch_start)
word_speed = interval_word_num / (
time.time() - batch_start)
logger.info(
"step_idx: %d, epoch: %d, batch: %d, avg loss: %f, "
"normalized loss: %f, ppl: %f, avg_speed: %.2f step/s"
% (step_idx, pass_id, batch_id, total_avg_cost,
total_avg_cost - loss_normalizer,
np.exp([min(total_avg_cost, 100)]),
train_avg_batch_cost))
"normalized loss: %f, ppl: %f, avg_speed: %.2f step/s, "
"words speed: %0.2f works/s" %
(step_idx, pass_id, batch_id, total_avg_cost,
total_avg_cost - loss_normalizer,
np.exp([min(total_avg_cost, 100)]),
train_avg_batch_cost, word_speed))
batch_start = time.time()
interval_word_num = 0.0
if step_idx % args.save_step == 0 and step_idx != 0:
# validation
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册