提交 70f9601b 编写于 作者: G guosheng

Refine Transformer by following comments

上级 ef88e63f
......@@ -16,7 +16,7 @@ class TrainTaskConfig(object):
warmup_steps = 4000
# the flag indicating to use average loss or sum loss when training.
use_avg = False
use_avg_cost = False
# the directory for saving trained models.
model_dir = "trained_models"
......
......@@ -115,7 +115,7 @@ def main():
beta1=TrainTaskConfig.beta1,
beta2=TrainTaskConfig.beta2,
epsilon=TrainTaskConfig.eps)
optimizer.minimize(avg_cost if TrainTaskConfig.use_avg else sum_cost)
optimizer.minimize(avg_cost if TrainTaskConfig.use_avg_cost else sum_cost)
train_data = paddle.batch(
paddle.reader.shuffle(
......@@ -184,9 +184,9 @@ def main():
val_sum_cost, val_avg_cost = test(exe)
pass_end_time = time.time()
time_consumed = pass_end_time - pass_start_time
print(
"epoch: %d, val sum loss: %f, val avg loss: %f, val ppl: %f, consumed %fs"
% (pass_id, val_sum_cost, val_avg_cost,
print("epoch: %d, val sum loss: %f, val avg loss: %f, val ppl: %f, "
"consumed %fs" %
(pass_id, val_sum_cost, val_avg_cost,
np.exp([min(val_avg_cost, 100)]), time_consumed))
fluid.io.save_inference_model(
os.path.join(TrainTaskConfig.model_dir,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册